Compare commits

..

1 Commits

Author SHA1 Message Date
dgtlmoon
309f36cdd6 Adding contributors section 2024-01-17 11:41:28 +01:00
26 changed files with 62 additions and 208 deletions

View File

@@ -1,10 +0,0 @@
version: 2
updates:
- package-ecosystem: github-actions
directory: /
schedule:
interval: "weekly"
groups:
all:
patterns:
- "*"

View File

@@ -34,7 +34,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -45,7 +45,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v3
uses: github/codeql-action/autobuild@v2
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@@ -59,4 +59,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
uses: github/codeql-action/analyze@v2

View File

@@ -41,7 +41,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v5
uses: actions/setup-python@v4
with:
python-version: 3.11

View File

@@ -9,7 +9,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v4
with:
python-version: "3.x"
- name: Install pypa/build
@@ -21,7 +21,7 @@ jobs:
- name: Build a binary wheel and a source tarball
run: python3 -m build
- name: Store the distribution packages
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: python-package-distributions
path: dist/
@@ -34,7 +34,7 @@ jobs:
- build
steps:
- name: Download all the dists
uses: actions/download-artifact@v4
uses: actions/download-artifact@v3
with:
name: python-package-distributions
path: dist/
@@ -64,7 +64,7 @@ jobs:
steps:
- name: Download all the dists
uses: actions/download-artifact@v4
uses: actions/download-artifact@v3
with:
name: python-package-distributions
path: dist/

View File

@@ -26,7 +26,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v5
uses: actions/setup-python@v4
with:
python-version: 3.11

View File

@@ -11,7 +11,7 @@ jobs:
# Mainly just for link/flake8
- name: Set up Python 3.11
uses: actions/setup-python@v5
uses: actions/setup-python@v4
with:
python-version: '3.11'

View File

@@ -1,7 +1,6 @@
recursive-include changedetectionio/api *
recursive-include changedetectionio/blueprint *
recursive-include changedetectionio/model *
recursive-include changedetectionio/plugins *
recursive-include changedetectionio/processors *
recursive-include changedetectionio/res *
recursive-include changedetectionio/static *

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.45.13'
__version__ = '0.45.12'
from distutils.util import strtobool
from json.decoder import JSONDecodeError

View File

@@ -133,7 +133,6 @@ class WatchHistory(Resource):
# Get a list of available history for a watch by UUID
# curl http://localhost:5000/api/v1/watch/<string:uuid>/history
@auth.check_token
def get(self, uuid):
"""
@api {get} /api/v1/watch/<string:uuid>/history Get a list of all historical snapshots available for a watch

View File

@@ -101,7 +101,6 @@ class Fetcher():
error = None
fetcher_description = "No description"
headers = {}
is_plaintext = None
instock_data = None
instock_data_js = ""
status_code = None

View File

@@ -1,6 +1,6 @@
#!/usr/bin/python3
from changedetectionio import queuedWatchMetaData, html_tools, __version__
from changedetectionio import queuedWatchMetaData
from copy import deepcopy
from distutils.util import strtobool
from feedgen.feed import FeedGenerator
@@ -35,6 +35,8 @@ from flask import (
)
from flask_paginate import Pagination, get_page_parameter
from changedetectionio import html_tools, __version__
from changedetectionio.api import api_v1
datastore = None
@@ -48,18 +50,6 @@ extra_stylesheets = []
update_q = queue.PriorityQueue()
notification_q = queue.Queue()
def get_plugin_manager():
import pluggy
from changedetectionio.plugins import hookspecs
from changedetectionio.plugins import whois as whois_plugin
pm = pluggy.PluginManager("changedetectionio_plugin")
pm.add_hookspecs(hookspecs)
pm.load_setuptools_entrypoints("changedetectionio_plugin")
pm.register(whois_plugin)
return pm
app = Flask(__name__,
static_url_path="",
static_folder="static",
@@ -106,6 +96,7 @@ def init_app_secret(datastore_path):
return secret
@app.template_global()
def get_darkmode_state():
css_dark_mode = request.cookies.get('css_dark_mode', 'false')
@@ -638,6 +629,7 @@ def changedetection_app(config=None, datastore_o=None):
form.fetch_backend.choices.append(p)
form.fetch_backend.choices.append(("system", 'System settings default'))
# form.browser_steps[0] can be assumed that we 'goto url' first
if datastore.proxy_list is None:
@@ -738,8 +730,6 @@ def changedetection_app(config=None, datastore_o=None):
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
processor_config = next((p[2] for p in processors.available_processors() if p[0] == watch.get('processor')), None)
# Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver
output = render_template("edit.html",
@@ -754,7 +744,6 @@ def changedetection_app(config=None, datastore_o=None):
is_html_webdriver=is_html_webdriver,
jq_support=jq_support,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
processor_config=processor_config,
settings_application=datastore.data['settings']['application'],
using_global_webdriver_wait=default['webdriver_delay'] is None,
uuid=uuid,
@@ -835,14 +824,11 @@ def changedetection_app(config=None, datastore_o=None):
flash("An error occurred, please see below.", "error")
output = render_template("settings.html",
api_key=datastore.data['settings']['application'].get('api_access_token'),
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
form=form,
hide_remove_pass=os.getenv("SALTED_PASS", False),
settings_application=datastore.data['settings']['application'],
plugins=[]
)
api_key=datastore.data['settings']['application'].get('api_access_token'),
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
settings_application=datastore.data['settings']['application'])
return output

View File

@@ -410,7 +410,7 @@ class quickWatchForm(Form):
url = fields.URLField('URL', validators=[validateURL()])
tags = StringTagUUID('Group tag', [validators.Optional()])
watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
processor = RadioField(u'Processor', choices=[t[:2] for t in processors.available_processors()], default="text_json_diff")
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
@@ -427,7 +427,7 @@ class commonSettingsForm(Form):
message="Should contain one or more seconds")])
class importForm(Form):
from . import processors
processor = RadioField(u'Processor', choices=[t[:2] for t in processors.available_processors()], default="text_json_diff")
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
urls = TextAreaField('URLs')
xlsx_file = FileField('Upload .xlsx file', validators=[FileAllowed(['xlsx'], 'Must be .xlsx file!')])
file_mapping = SelectField('File mapping', [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})

View File

@@ -38,7 +38,6 @@ class model(dict):
'notification_format': default_notification_format,
'notification_title': default_notification_title,
'notification_urls': [], # Apprise URL list
'plugins': [], # list of dict, keyed by plugin name, with dict of the config and enabled true/false
'pager_size': 50,
'password': False,
'render_anchor_tag_content': False,

View File

@@ -1,6 +0,0 @@
import pluggy
hookimpl = pluggy.HookimplMarker("changedetectionio_plugin")
"""Marker to be imported and used in plugins (and for own implementations)"""
x=1

View File

@@ -1,20 +0,0 @@
import pluggy
from changedetectionio.store import ChangeDetectionStore
hookspec = pluggy.HookspecMarker("changedetectionio_plugin")
@hookspec
def extra_processor():
"""Defines a new fetch method
:return: a tuples, (machine_name, description)
"""
@hookspec(firstresult=True)
def processor_call(processor_name: str, datastore: ChangeDetectionStore, watch_uuid: str):
"""
Call processors with processor name
:param processor_name: as defined in extra_processors
:return: data?
"""

View File

@@ -1,53 +0,0 @@
"""
Whois information lookup
- Fetches using whois
- Extends the 'text_json_diff' so that text filters can still be used with whois information
@todo publish to pypi and github as a separate plugin
"""
from ..plugins import hookimpl
import changedetectionio.processors.text_json_diff as text_json_diff
from changedetectionio import content_fetcher
# would be changedetectionio.plugins in other apps
class text_json_filtering_whois(text_json_diff.perform_site_check):
def __init__(self, *args, datastore, watch_uuid, **kwargs):
super().__init__(*args, datastore=datastore, watch_uuid=watch_uuid, **kwargs)
def call_browser(self):
import whois
# the whois data
self.fetcher = content_fetcher.Fetcher()
self.fetcher.is_plaintext = True
from urllib.parse import urlparse
parsed = urlparse(self.watch.link)
w = whois.whois(parsed.hostname)
self.fetcher.content= w.text
@hookimpl
def extra_processor():
"""
Advertise a new processor
:return:
"""
from changedetectionio.processors import default_processor_config
processor_config = dict(default_processor_config)
# Which UI elements are not used
processor_config['needs_request_fetch_method'] = False
processor_config['needs_browsersteps'] = False
processor_config['needs_visualselector'] = False
return ('plugin_processor_whois', "Whois domain information fetch", processor_config)
# @todo When a watch chooses this extra_process processor, the watch should ONLY use this one.
# (one watch can only have one extra_processor)
@hookimpl
def processor_call(processor_name, datastore, watch_uuid):
if processor_name == 'plugin_processor_whois': # could be removed, see above note
x = text_json_filtering_whois(datastore=datastore, watch_uuid=watch_uuid)
return x
return None

View File

@@ -7,15 +7,6 @@ from copy import deepcopy
from distutils.util import strtobool
from loguru import logger
# Which UI elements in settings the processor requires
# For example, restock monitor isnt compatible with visualselector and filters
default_processor_config = {
'needs_request_fetch_method': True,
'needs_browsersteps': True,
'needs_visualselector': True,
'needs_filters': True,
}
class difference_detection_processor():
browser_steps = None
@@ -141,15 +132,6 @@ class difference_detection_processor():
def available_processors():
from . import restock_diff, text_json_diff
from ..flask_app import get_plugin_manager
pm = get_plugin_manager()
x = [('text_json_diff', text_json_diff.name, dict(default_processor_config)),
('restock_diff', restock_diff.name, dict(default_processor_config))
]
plugin_choices = pm.hook.extra_processor()
if plugin_choices:
for p in plugin_choices:
x.append(p)
x=[('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)]
# @todo Make this smarter with introspection of sorts.
return x

View File

@@ -155,7 +155,7 @@ class perform_site_check(difference_detection_processor):
html_content = self.fetcher.content
# If not JSON, and if it's not text/plain..
if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower() or self.fetcher.is_plaintext:
if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower():
# Don't run get_text or xpath/css filters on plaintext
stripped_text_from_html = html_content
else:

View File

@@ -1,10 +1,3 @@
// Restock Detector
// (c) Leigh Morresi dgtlmoon@gmail.com
//
// Assumes the product is in stock to begin with, unless the following appears above the fold ;
// - outOfStockTexts appears above the fold (out of stock)
// - negateOutOfStockRegex (really is in stock)
function isItemInStock() {
// @todo Pass these in so the same list can be used in non-JS fetchers
const outOfStockTexts = [
@@ -63,7 +56,6 @@ function isItemInStock() {
'품절'
];
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
function getElementBaseText(element) {
// .textContent can include text from children which may give the wrong results
// scan only immediate TEXT_NODEs, which will be a child of the element
@@ -74,13 +66,19 @@ function isItemInStock() {
return text.toLowerCase().trim();
}
const negateOutOfStockRegex = new RegExp('([0-9] in stock|add to cart)', 'ig');
const negateOutOfStockRegexs = [
'[0-9] in stock'
]
var negateOutOfStockRegexs_r = [];
for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g'));
}
// The out-of-stock or in-stock-text is generally always above-the-fold
// and often below-the-fold is a list of related products that may or may not contain trigger text
// so it's good to filter to just the 'above the fold' elements
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
const elementsToScan = Array.from(document.getElementsByTagName('*')).filter(element => element.getBoundingClientRect().top + window.scrollY <= vh && element.getBoundingClientRect().top + window.scrollY >= 100);
const elementsToScan = Array.from(document.getElementsByTagName('*')).filter(element => element.getBoundingClientRect().top + window.scrollY <= window.innerHeight && element.getBoundingClientRect().top + window.scrollY >= 100);
var elementText = "";
@@ -96,8 +94,10 @@ function isItemInStock() {
if (elementText.length) {
// try which ones could mean its in stock
if (negateOutOfStockRegex.test(elementText)) {
return 'Possibly in stock';
for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
if (negateOutOfStockRegexs_r[i].test(elementText)) {
return 'Possibly in stock';
}
}
}
}

View File

@@ -115,12 +115,6 @@
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. <br>
For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
</p>
<p>
For JSON payloads, use <strong>|tojson</strong> without quotes for automatic escaping, for example - <code>{ "name": {{ '{{ watch_title|tojson }}' }} }</code>
</p>
<p>
URL encoding, use <strong>|urlencode</strong>, for example - <code>gets://hook-website.com/test.php?title={{ '{{ watch_title|urlencode }}' }}</code>
</p>
</div>
</div>
<div class="pure-control-group">

View File

@@ -39,15 +39,12 @@
<ul>
<li class="tab" id=""><a href="#general">General</a></li>
<li class="tab"><a href="#request">Request</a></li>
{% if playwright_enabled and processor_config['needs_browsersteps'] %}
{% if playwright_enabled %}
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
{% endif %}
{% if processor_config['needs_visualselector'] %}
{% if watch['processor'] == 'text_json_diff' %}
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
{% endif %}
{% if processor_config['needs_filters'] %}
<li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
{% endif %}
@@ -70,12 +67,16 @@
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br>
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br>
</div>
<div class="pure-control-group">
<label for="title">Processing mode</label>
{% for a in available_processors %}
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor={{ a[0] }}" class="pure-button button-xsmall {% if watch['processor'] == a[0] %}button-secondary{% endif %}">{{ a[1]}}.</a>
{% endfor %}
<span class="pure-form-message-inline">
{% if watch['processor'] == 'text_json_diff' %}
Current mode: <strong>Webpage Text/HTML, JSON and PDF changes.</strong><br>
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=restock_diff" class="pure-button button-xsmall">Switch to re-stock detection mode.</a>
{% else %}
Current mode: <strong>Re-stock detection.</strong><br>
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=text_json_diff" class="pure-button button-xsmall">Switch to Webpage Text/HTML, JSON and PDF changes mode.</a>
{% endif %}
</span>
</div>
<div class="pure-control-group">
{{ render_field(form.title, class="m-d") }}
@@ -107,7 +108,6 @@
</div>
<div class="tab-pane-inner" id="request">
{% if processor_config['needs_request_fetch_method'] %}
<div class="pure-control-group inline-radio">
{{ render_field(form.fetch_backend, class="fetch-backend") }}
<span class="pure-form-message-inline">
@@ -116,7 +116,6 @@
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
</span>
</div>
{% endif %}
{% if form.proxy %}
<div class="pure-control-group inline-radio">
<div>{{ form.proxy.label }} <a href="" id="check-all-proxies" class="pure-button button-secondary button-xsmall" >Check/Scan all</a></div>
@@ -194,7 +193,7 @@ User-Agent: wonderbra 1.0") }}
</div>
</fieldset>
</div>
{% if playwright_enabled and processor_config['needs_browsersteps'] %}
{% if playwright_enabled %}
<div class="tab-pane-inner" id="browser-steps">
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
<fieldset>
@@ -265,10 +264,8 @@ User-Agent: wonderbra 1.0") }}
</fieldset>
</div>
{% if processor_config['needs_filters'] %}
{% if watch['processor'] == 'text_json_diff' %}
<div class="tab-pane-inner" id="filters-and-triggers">
<div class="text-filtering">
<h3>Filter by HTML element</h3>
<div class="pure-control-group">
<strong>Pro-tips:</strong><br>
<ul>
@@ -318,7 +315,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</span>
</div>
<div class="pure-control-group">
<fieldset class="pure-control-group">
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
footer
nav
@@ -329,8 +326,7 @@ nav
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</div>
</div>
</fieldset>
<div class="text-filtering">
<fieldset class="pure-group" id="text-filtering-type-options">
<h3>Text filtering</h3>
@@ -427,7 +423,7 @@ Unavailable") }}
</div>
{% endif %}
{% if processor_config['needs_visualselector'] %}
{% if watch['processor'] == 'text_json_diff' %}
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">

View File

@@ -22,7 +22,6 @@
<li class="tab"><a href="#filters">Global Filters</a></li>
<li class="tab"><a href="#api">API</a></li>
<li class="tab"><a href="#proxies">CAPTCHA &amp; Proxies</a></li>
<li class="tab"><a href="#plugins">Plugins</a></li>
</ul>
</div>
<div class="box-wrap inner">
@@ -244,12 +243,6 @@ nav
{{ render_field(form.requests.form.extra_browsers) }}
</div>
</div>
<div class="tab-pane-inner" id="plugins">
available plugin on/off stuff here
how to let each one expose config?
</div>
<div id="actions">
<div class="pure-control-group">
{{ render_button(form.save_button) }}

View File

@@ -1,8 +1,8 @@
#!/usr/bin/python3
from .util import set_original_response, live_server_setup, wait_for_all_checks
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
from flask import url_for
import io
from urllib.request import urlopen
from zipfile import ZipFile
import re
import time
@@ -37,10 +37,15 @@ def test_backup(client, live_server):
# Should be PK/ZIP stream
assert res.data.count(b'PK') >= 2
backup = ZipFile(io.BytesIO(res.data))
l = backup.namelist()
# ZipFile from buffer seems non-obvious, just save it instead
with open("download.zip", 'wb') as f:
f.write(res.data)
zip = ZipFile('download.zip')
l = zip.namelist()
uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
newlist = list(filter(uuid4hex.match, l)) # Read Note below
# Should be two txt files in the archive (history and the snapshot)
assert len(newlist) == 2

View File

@@ -259,13 +259,6 @@ class update_worker(threading.Thread):
update_handler = restock_diff.perform_site_check(datastore=self.datastore,
watch_uuid=uuid
)
elif processor.startswith('plugin_processor_'):
from .flask_app import get_plugin_manager
pm = get_plugin_manager()
x = pm.hook.processor_call(processor_name=processor, datastore=self.datastore, watch_uuid=uuid)
if x:
update_handler = x
else:
# Used as a default and also by some tests
update_handler = text_json_diff.perform_site_check(datastore=self.datastore,

View File

@@ -95,7 +95,6 @@ services:
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
# Note: works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) and other issues
# More information about the advantages of playwright/browserless https://www.browserless.io/blog/2023/12/13/migrating-selenium-to-playwright/
# browser-chrome:
# hostname: browser-chrome
# image: selenium/standalone-chrome:4

View File

@@ -73,5 +73,4 @@ pytest-flask ~=1.2
# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
jsonschema==4.17.3
pluggy
loguru