Compare commits

...

56 Commits

Author SHA1 Message Date
dgtlmoon
a6eff9977a Maybe this solves cryptography <-> rustc 2024-06-18 11:13:00 +02:00
dgtlmoon
7164f33967 Merge branch 'master' into 2039-restock-use-itemprop 2024-06-18 10:27:43 +02:00
dgtlmoon
43ddfa5005 Add delay 2024-06-17 18:42:06 +02:00
dgtlmoon
4ba847e481 hmm 2024-06-17 16:40:13 +02:00
dgtlmoon
f9f83e6663 hmm 2024-06-17 16:16:07 +02:00
dgtlmoon
d135240bd0 store everything 2024-06-17 15:24:54 +02:00
dgtlmoon
9b8b76084a add delay for GH 2024-06-17 14:41:54 +02:00
dgtlmoon
a59a674f7f Merge branch 'master' into 2039-restock-use-itemprop 2024-06-17 13:38:47 +02:00
dgtlmoon
8915171b84 not used 2024-06-17 11:50:10 +02:00
dgtlmoon
7aa1e5cc00 Merge branch 'master' into 2039-restock-use-itemprop 2024-06-17 11:49:55 +02:00
dgtlmoon
dcd000e2d3 add check delays 2024-06-14 14:36:37 +02:00
dgtlmoon
6824fa22c5 workaround for no previous price present 2024-06-13 18:00:11 +02:00
dgtlmoon
afc88d654b Merge branch 'master' into 2039-restock-use-itemprop 2024-06-13 16:53:46 +02:00
dgtlmoon
b39c770f84 adding % threshold handling 2024-06-12 18:29:44 +02:00
dgtlmoon
575bdcfbe8 WIP 2024-06-12 18:11:20 +02:00
dgtlmoon
32579d7800 fix debug message 2024-06-12 16:44:20 +02:00
dgtlmoon
37a021a701 fix styling 2024-06-12 16:43:10 +02:00
dgtlmoon
b9b0a9260d fix 2024-06-12 16:39:09 +02:00
dgtlmoon
99f7f78798 fix test 2024-06-12 16:31:46 +02:00
dgtlmoon
e756002b9a min/mnax price check 2024-06-12 15:25:21 +02:00
dgtlmoon
cf43852645 Fixing test setup 2024-06-12 14:44:45 +02:00
dgtlmoon
ded4495801 Adding follow_price_changes 2024-06-12 14:43:54 +02:00
dgtlmoon
18f80e1592 abstract out the extra tab 2024-06-12 10:32:21 +02:00
dgtlmoon
e1c903be0c part 1/2 - Abstract out form handling for the form class 2024-06-11 17:26:06 +02:00
dgtlmoon
f983ec2ff0 Merge branch 'master' into 2039-restock-use-itemprop 2024-06-07 13:56:33 +02:00
dgtlmoon
6024d020be Merge branch 'master' into 2039-restock-use-itemprop 2024-06-05 12:32:55 +02:00
dgtlmoon
f5dba66c48 small fix 2024-06-04 15:28:48 +02:00
dgtlmoon
fa6209687a Merge branch 'master' into 2039-restock-use-itemprop 2024-06-04 14:39:57 +02:00
dgtlmoon
d0acc59a13 Merge branch 'master' into 2039-restock-use-itemprop 2024-06-04 12:12:01 +02:00
dgtlmoon
c1a0481ec0 skip rdfa? 2024-05-23 09:59:22 +02:00
dgtlmoon
d1528bbe89 Misc tweaks 2024-05-22 16:14:41 +02:00
dgtlmoon
929044581b tweak imports 2024-05-22 15:55:30 +02:00
dgtlmoon
df6d120c4f Merge branch 'master' into 2039-restock-use-itemprop 2024-05-22 15:46:47 +02:00
dgtlmoon
17c87f494d Merge branch 'ui-search-error-messages' into 2039-restock-use-itemprop 2024-05-10 18:15:36 +02:00
dgtlmoon
9ffa4eda3d UI - Search should scan/search error messages 2024-05-10 17:59:29 +02:00
dgtlmoon
d38bb6167b big refactor 2024-05-10 17:52:28 +02:00
dgtlmoon
32e074da2b Merge branch 'master' into 2039-restock-use-itemprop 2024-05-07 15:24:33 +02:00
dgtlmoon
ceeb4d54b7 tweak styles 2024-05-07 15:16:23 +02:00
dgtlmoon
8680a29777 UI tweaks 2024-05-07 15:01:41 +02:00
dgtlmoon
41a6c608b1 test note needed 2024-05-07 11:42:09 +02:00
dgtlmoon
b1170c1e33 WIP 2024-05-07 11:34:28 +02:00
dgtlmoon
0b85990d28 more work 2024-05-06 16:20:08 +02:00
dgtlmoon
345b54d401 add more test examples 2024-05-03 14:53:35 +02:00
dgtlmoon
2aa8ca0e58 tweaks 2024-05-03 14:47:13 +02:00
dgtlmoon
fe8b7b8162 Adding test 2024-05-03 11:07:40 +02:00
dgtlmoon
86f1d8bc0a Make it testable 2024-05-03 09:18:41 +02:00
dgtlmoon
49246bcaf9 Merge branch '2039-restock-use-itemprop' of github.com:dgtlmoon/changedetection.io into 2039-restock-use-itemprop 2024-05-03 09:16:33 +02:00
dgtlmoon
37d081cc43 Merge branch 'master' into 2039-restock-use-itemprop 2024-05-03 09:15:54 +02:00
dgtlmoon
b71b457c24 Merge branch 'master' into 2039-restock-use-itemprop 2024-01-14 18:26:24 +01:00
dgtlmoon
61185b5514 Tidy imports 2023-12-09 13:21:21 +01:00
dgtlmoon
69513c47cd Offer more helpful text 2023-12-09 13:17:57 +01:00
dgtlmoon
8090d0dac6 cleanup 2023-12-09 13:08:52 +01:00
dgtlmoon
0f9d3a0dcf logic fixes and force faster xpath1 2023-12-09 13:05:40 +01:00
dgtlmoon
a16b129731 Simplify logic 2023-12-09 12:56:02 +01:00
dgtlmoon
d58c24db44 Set false text also 2023-12-08 17:37:05 +01:00
dgtlmoon
436c412faf Re #2039 - Use itemprop where available 2023-12-08 17:16:05 +01:00
25 changed files with 794 additions and 449 deletions

View File

@@ -93,7 +93,7 @@ jobs:
- name: Playwright and SocketPuppetBrowser - Headers and requests
run: |
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
- name: Playwright and SocketPuppetBrowser - Restock detection
run: |
@@ -231,9 +231,9 @@ jobs:
docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
- name: Store container log
- name: Store everything including test-datastore
if: always()
uses: actions/upload-artifact@v4
with:
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
path: output-logs
path: .

View File

@@ -26,7 +26,8 @@ WORKDIR /install
COPY requirements.txt /requirements.txt
RUN pip install --target=/dependencies -r /requirements.txt
# --extra-index-url https://www.piwheels.org/simple is for cryptography module to be prebuilt (or rustc etc needs to be installed)
RUN pip install --extra-index-url https://www.piwheels.org/simple --target=/dependencies -r /requirements.txt
# Playwright is an alternative to Selenium
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing

View File

@@ -12,9 +12,10 @@ import copy
# See docs/README.md for rebuilding the docs/apidoc information
from . import api_schema
from ..model import watch_base
# Build a JSON Schema atleast partially based on our Watch model
from changedetectionio.model.Watch import base_config as watch_base_config
watch_base_config = watch_base()
schema = api_schema.build_watch_json_schema(watch_base_config)
schema_create_watch = copy.deepcopy(schema)

View File

@@ -17,6 +17,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
def accept(uuid):
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
return redirect(url_for("index"))

View File

@@ -103,7 +103,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
default = datastore.data['settings']['application']['tags'].get(uuid)
form = forms.watchForm(formdata=request.form if request.method == 'POST' else None,
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
data=default,
)
form.datastore=datastore # needed?
@@ -126,7 +126,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
default = datastore.data['settings']['application']['tags'].get(uuid)
form = forms.watchForm(formdata=request.form if request.method == 'POST' else None,
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
data=default,
)
# @todo subclass form so validation works

View File

@@ -5,6 +5,9 @@ import os
import queue
import threading
import time
from jinja2 import Template
from .safe_jinja import render as jinja_render
from changedetectionio.strtobool import strtobool
from copy import deepcopy
@@ -616,7 +619,6 @@ def changedetection_app(config=None, datastore_o=None):
@login_optionally_required
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
def edit_page(uuid):
from . import forms
from .blueprint.browser_steps.browser_steps import browser_step_ui_config
@@ -652,9 +654,17 @@ def changedetection_app(config=None, datastore_o=None):
# Radio needs '' not None, or incase that the chosen one no longer exists
if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list):
default['proxy'] = ''
# proxy_override set to the json/text list of the items
form = forms.watchForm(formdata=request.form if request.method == 'POST' else None,
processor = datastore.data['watching'][uuid].get('processor', '')
form_class_name = f"processor_{processor}_form"
try:
form_class = getattr(forms, form_class_name)
except AttributeError:
flash(f"Cannot load the edit form for processor/plugin '{processor}', plugin missing?", 'error')
return redirect(url_for('index'))
form = form_class(formdata=request.form if request.method == 'POST' else None,
data=default
)
@@ -761,23 +771,38 @@ def changedetection_app(config=None, datastore_o=None):
# Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver
template_args = {
'available_processors': processors.available_processors(),
'browser_steps_config': browser_step_ui_config,
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
'extra_title': f" - Edit - {watch.label}",
'extra_processor_config': form.extra_tab_content(),
'form': form,
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
'is_html_webdriver': is_html_webdriver,
'jq_support': jq_support,
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
'settings_application': datastore.data['settings']['application'],
'using_global_webdriver_wait': not default['webdriver_delay'],
'uuid': uuid,
'visualselector_enabled': visualselector_enabled,
'watch': watch
}
included_content = None
if form.extra_form_content():
# So that the extra panels can access _helpers.html etc
from jinja2 import Environment, FileSystemLoader
env = Environment(loader=FileSystemLoader('changedetectionio/templates'))
template = env.from_string(form.extra_form_content())
included_content = template.render(**template_args)
output = render_template("edit.html",
available_processors=processors.available_processors(),
browser_steps_config=browser_step_ui_config,
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
extra_title=f" - Edit - {watch.label}",
form=form,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
has_extra_headers_file=len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
has_special_tag_options=_watch_has_tag_options_set(watch=watch),
is_html_webdriver=is_html_webdriver,
jq_support=jq_support,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
settings_application=datastore.data['settings']['application'],
using_global_webdriver_wait=not default['webdriver_delay'],
uuid=uuid,
visualselector_enabled=visualselector_enabled,
watch=watch
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
extra_form_content=included_content,
**template_args
)
return output

View File

@@ -1,5 +1,8 @@
import os
import re
from wtforms.fields.numeric import FloatField
from changedetectionio.strtobool import strtobool
from wtforms import (
@@ -447,7 +450,7 @@ class SingleBrowserStep(Form):
# remove_button = SubmitField('-', render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Remove'})
# add_button = SubmitField('+', render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Add new step after'})
class watchForm(commonSettingsForm):
class processor_text_json_diff_form(commonSettingsForm):
url = fields.URLField('URL', validators=[validateURL()])
tags = StringTagUUID('Group tag', [validators.Optional()], default='')
@@ -475,9 +478,6 @@ class watchForm(commonSettingsForm):
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
filter_text_removed = BooleanField('Removed lines', default=True)
# @todo this class could be moved to its own text_json_diff_watchForm and this goes to restock_diff_Watchform perhaps
in_stock_only = BooleanField('Only trigger when product goes BACK to in-stock', default=True)
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
@@ -493,6 +493,12 @@ class watchForm(commonSettingsForm):
notification_muted = BooleanField('Notifications Muted / Off', default=False)
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
def extra_tab_content(self):
return None
def extra_form_content(self):
return None
def validate(self, **kwargs):
if not super().validate():
return False
@@ -514,6 +520,58 @@ class watchForm(commonSettingsForm):
return result
class processor_restock_diff_form(processor_text_json_diff_form):
in_stock_only = BooleanField('Only trigger when product goes BACK to in-stock', default=True)
price_change_min = FloatField('Minimum amount to trigger notification', [validators.Optional()], render_kw={"placeholder": "No limit", "size": "10"})
price_change_max = FloatField('Maximum amount to trigger notification', [validators.Optional()], render_kw={"placeholder": "No limit", "size": "10"})
price_change_threshold_percent = FloatField('Threshold in % for price changes', validators=[
validators.Optional(),
validators.NumberRange(min=0, max=100, message="Should be between 0 and 100"),
], render_kw={"placeholder": "0%", "size": "5"})
follow_price_changes = BooleanField('Follow price changes', default=False)
def extra_tab_content(self):
return 'Restock & Price Detection'
def extra_form_content(self):
return """
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
<script>
$(document).ready(function () {
toggleOpacity('#follow_price_changes', '.price-change-minmax', true);
});
</script>
<fieldset>
<div class="pure-control-group">
<fieldset class="pure-group">
{{ render_checkbox_field(form.in_stock_only) }}
<span class="pure-form-message-inline">Only trigger notifications when page changes from <strong>out of stock</strong> to <strong>back in stock</strong></span>
</fieldset>
<fieldset class="pure-group">
{{ render_checkbox_field(form.follow_price_changes) }}
<span class="pure-form-message-inline">Changes in price should trigger a notification</span>
<span class="pure-form-message-inline">When OFF - only care about restock detection</span>
</fieldset>
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.price_change_min) }}
<span class="pure-form-message-inline">Minimum amount, only trigger a change when the price is less than this amount.</span>
</fieldset>
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.price_change_max) }}
<span class="pure-form-message-inline">Maximum amount, only trigger a change when the price is more than this amount.</span>
</fieldset>
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.price_change_threshold_percent) }}
<span class="pure-form-message-inline">Price must change more than this % to trigger a change.</span><br>
<span class="pure-form-message-inline">For example, If the product is $1,000 USD, <strong>2%</strong> would mean it has to change more than $20 since the last check.</span><br>
</fieldset>
</div>
</fieldset>"""
class SingleExtraProxy(Form):
# maybe better to set some <script>var..

View File

@@ -240,7 +240,7 @@ def _get_stripped_text_from_json_match(match):
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
stripped_text_from_html = False
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
try:
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
@@ -279,17 +279,19 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
if isinstance(json_data, dict):
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
# @type could also be a list (Product, SubType)
# @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
# LD_JSON auto-extract also requires some content PLUS the ldjson to be present
# 1833 - could be either str or dict, should not be anything else
if json_data.get('@type') and stripped_text_from_html:
try:
if json_data.get('@type') == str or json_data.get('@type') == dict:
types = [json_data.get('@type')] if isinstance(json_data.get('@type'), str) else json_data.get('@type')
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in types]:
break
except:
continue
t = json_data.get('@type')
if t and stripped_text_from_html:
if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
break
# The non-standard part, some have a list
elif isinstance(t, list):
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
break
elif stripped_text_from_html:
break

View File

@@ -1,19 +1,14 @@
from .Watch import base_config
import uuid
class model(dict):
from changedetectionio.model import watch_base
class model(watch_base):
def __init__(self, *arg, **kw):
self.update(base_config)
self['uuid'] = str(uuid.uuid4())
super(model, self).__init__(*arg, **kw)
if kw.get('default'):
self.update(kw['default'])
del kw['default']
# Goes at the end so we update the default object with the initialiser
super(model, self).__init__(*arg, **kw)

View File

@@ -1,6 +1,6 @@
from changedetectionio.strtobool import strtobool
from changedetectionio.safe_jinja import render as jinja_render
from . import watch_base
import os
import re
import time
@@ -15,69 +15,6 @@ SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
from changedetectionio.notification import (
default_notification_format_for_watch
)
base_config = {
'body': None,
'browser_steps': [],
'browser_steps_last_error_step': None,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'check_count': 0,
'date_created': None,
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'extract_text': [], # Extract text by regex after filters
'extract_title_as_title': False,
'fetch_backend': 'system', # plaintext, playwright etc
'fetch_time': 0.0,
'processor': 'text_json_diff', # could be restock_diff or others from .processors
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'filter_text_added': True,
'filter_text_replaced': True,
'filter_text_removed': True,
'has_ldjson_price_data': None,
'track_ldjson_price_data': None,
'headers': {}, # Extra headers to send
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
'in_stock' : None,
'in_stock_only' : True, # Only trigger change on going to instock from out-of-stock
'include_filters': [],
'last_checked': 0,
'last_error': False,
'last_viewed': 0, # history key value of the last viewed via the [diff] link
'method': 'GET',
'notification_alert_count': 0,
# Custom notification content
'notification_body': None,
'notification_format': default_notification_format_for_watch,
'notification_muted': False,
'notification_title': None,
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'paused': False,
'previous_md5': False,
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
'proxy': None, # Preferred proxy connection
'remote_server_reply': None, # From 'server' reply header
'sort_text_alphabetically': False,
'subtractive_selectors': [],
'tag': '', # Old system of text name for a tag, to be removed
'tags': [], # list of UUIDs to App.Tags
'text_should_not_be_present': [], # Text that should not present
# Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default
# Should be all None by default, so we use the system default in this case.
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
'time_between_check_use_default': True,
'title': None,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': '',
'uuid': str(uuid.uuid4()),
'webdriver_delay': None,
'webdriver_js_execute_code': None, # Run before change-detection
}
def is_safe_url(test_url):
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
@@ -94,20 +31,17 @@ def is_safe_url(test_url):
return True
class model(dict):
class model(watch_base):
__newest_history_key = None
__history_n = 0
jitter_seconds = 0
def __init__(self, *arg, **kw):
self.update(base_config)
self.__datastore_path = kw['datastore_path']
self['uuid'] = str(uuid.uuid4())
del kw['datastore_path']
super(model, self).__init__(*arg, **kw)
if kw.get('default'):
self.update(kw['default'])
del kw['default']
@@ -115,9 +49,6 @@ class model(dict):
# Be sure the cached timestamp is ready
bump = self.history
# Goes at the end so we update the default object with the initialiser
super(model, self).__init__(*arg, **kw)
@property
def viewed(self):
# Don't return viewed when last_viewed is 0 and newest_key is 0
@@ -256,6 +187,17 @@ class model(dict):
return has_browser_steps
@property
def has_restock_info(self):
# has either price or availability
if self.get('restock'):
if self['restock'].get('price') != None:
return True
if self['restock'].get('availability') != None:
return True
return False
# Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
@property
def newest_history_key(self):

View File

@@ -0,0 +1,80 @@
import os
import uuid
from changedetectionio import strtobool
from changedetectionio.notification import default_notification_format_for_watch
class Restock(dict):
# @todo some setter to handle weird prices like "00,01" etc?
def __init__(self, *args, **kwargs):
default_values = {'in_stock': None, 'price': None, 'currency': None}
default_values.update(dict(*args, **kwargs))
super().__init__(default_values.copy())
class watch_base(dict):
def __init__(self, *arg, **kw):
self.update({
# Custom notification content
# Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default
# Should be all None by default, so we use the system default in this case.
'body': None,
'browser_steps': [],
'browser_steps_last_error_step': None,
'check_count': 0,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'date_created': None,
'extract_text': [], # Extract text by regex after filters
'extract_title_as_title': False,
'fetch_backend': 'system', # plaintext, playwright etc
'fetch_time': 0.0,
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'filter_text_added': True,
'filter_text_removed': True,
'filter_text_replaced': True,
'follow_price_changes': True,
'has_ldjson_price_data': None,
'headers': {}, # Extra headers to send
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
'in_stock': None,
'in_stock_only': True, # Only trigger change on going to instock from out-of-stock
'include_filters': [],
'last_checked': 0,
'last_error': False,
'last_viewed': 0, # history key value of the last viewed via the [diff] link
'method': 'GET',
'notification_alert_count': 0,
'notification_body': None,
'notification_format': default_notification_format_for_watch,
'notification_muted': False,
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
'notification_title': None,
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
'paused': False,
'previous_md5': False,
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
'processor': 'text_json_diff', # could be restock_diff or others from .processors
'price_change_threshold_percent': None,
'proxy': None, # Preferred proxy connection
'remote_server_reply': None, # From 'server' reply header
'sort_text_alphabetically': False,
'subtractive_selectors': [],
'tag': '', # Old system of text name for a tag, to be removed
'tags': [], # list of UUIDs to App.Tags
'text_should_not_be_present': [], # Text that should not present
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
'time_between_check_use_default': True,
'title': None,
'track_ldjson_price_data': None,
'trigger_text': [], # List of text or regex to wait for until a change is detected
'url': '',
'uuid': str(uuid.uuid4()),
'webdriver_delay': None,
'webdriver_js_execute_code': None, # Run before change-detection
})
super(watch_base, self).__init__(*arg, **kw)

View File

@@ -1,25 +1,113 @@
from . import difference_detection_processor
from ..model import Restock
from copy import deepcopy
from loguru import logger
import hashlib
import re
import urllib3
import time
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
name = 'Re-stock detection for single product pages'
name = 'Re-stock & Price detection for single product pages'
description = 'Detects if the product goes back to in-stock'
class UnableToExtractRestockData(Exception):
def __init__(self, status_code):
# Set this so we can use it in other parts of the app
self.status_code = status_code
return
def _search_prop_by_value(matches, value):
for properties in matches:
for prop in properties:
if value in prop[0]:
return prop[1] # Yield the desired value and exit the function
# should return Restock()
# add casting?
def get_itemprop_availability(html_content) -> Restock:
"""
Kind of funny/cool way to find price/availability in one many different possibilities.
Use 'extruct' to find any possible RDFa/microdata/json-ld data, make a JSON string from the output then search it.
"""
from jsonpath_ng import parse
now = time.time()
import extruct
logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
value = {}
now = time.time()
# Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
data = extruct.extract(html_content, syntaxes=syntaxes)
logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")
# First phase, dead simple scanning of anything that looks useful
if data:
logger.debug(f"Using jsonpath to find price/availability/etc")
price_parse = parse('$..(price|Price)')
pricecurrency_parse = parse('$..(pricecurrency|currency| priceCurrency )')
availability_parse = parse('$..(availability|Availability)')
price_result = price_parse.find(data)
if price_result:
value['price'] = price_result[0].value
pricecurrency_result = pricecurrency_parse.find(data)
if pricecurrency_result:
value['currency'] = pricecurrency_result[0].value
availability_result = availability_parse.find(data)
if availability_result:
value['availability'] = availability_result[0].value
if value.get('availability'):
value['availability'] = re.sub(r'(?i)^(https|http)://schema.org/', '',
value.get('availability').strip(' "\'').lower()) if value.get('availability') else None
# Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:)
if not value.get('price') or value.get('availability'):
logger.debug(f"Alternatively digging through OpenGraph properties for restock/price info..")
jsonpath_expr = parse('$..properties')
for match in jsonpath_expr.find(data):
if not value.get('price'):
value['price'] = _search_prop_by_value([match.value], "price:amount")
if not value.get('availability'):
value['availability'] = _search_prop_by_value([match.value], "product:availability")
if not value.get('currency'):
value['currency'] = _search_prop_by_value([match.value], "price:currency")
logger.trace(f"Processed with Extruct in {time.time()-now:.3f}s")
return Restock(value)
def is_between(number, lower=None, upper=None):
"""
Check if a number is between two values.
Parameters:
number (float): The number to check.
lower (float or None): The lower bound (inclusive). If None, no lower bound.
upper (float or None): The upper bound (inclusive). If None, no upper bound.
Returns:
bool: True if the number is between the lower and upper bounds, False otherwise.
"""
return (lower is None or lower <= number) and (upper is None or number <= upper)
class perform_site_check(difference_detection_processor):
screenshot = None
xpath_data = None
def run_changedetection(self, uuid, skip_when_checksum_same=True):
# DeepCopy so we can be sure we don't accidently change anything by reference
@@ -29,7 +117,7 @@ class perform_site_check(difference_detection_processor):
raise Exception("Watch no longer exists.")
# Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False}
update_obj = {'last_notification_error': False, 'last_error': False, 'restock': None}
self.screenshot = self.fetcher.screenshot
self.xpath_data = self.fetcher.xpath_data
@@ -38,29 +126,91 @@ class perform_site_check(difference_detection_processor):
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
# Main detection method
fetched_md5 = None
if self.fetcher.instock_data:
fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest()
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
update_obj["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
logger.debug(f"Watch UUID {uuid} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
else:
itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content)
# Something valid in get_itemprop_availability() by scraping metadata ?
if itemprop_availability.get('price') or itemprop_availability.get('availability'):
# Store for other usage
update_obj['restock'] = itemprop_availability
if itemprop_availability.get('availability'):
# @todo: Configurable?
if any(substring.lower() in itemprop_availability['availability'].lower() for substring in [
'instock',
'instoreonly',
'limitedavailability',
'onlineonly',
'presale']
):
update_obj['restock']['in_stock'] = True
else:
update_obj['restock']['in_stock'] = False
# Used for the change detection, we store the real data separately, in the future this can implement some min,max threshold
# @todo if price is None?
self.fetcher.instock_data = f"{itemprop_availability.get('availability')} - {itemprop_availability.get('price')}"
elif self.fetcher.instock_data:
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above in the metadata of the HTML
update_obj['restock'] = Restock({'in_stock': True if self.fetcher.instock_data == 'Possibly in stock' else False})
# @todo scrape price somehow
logger.debug(
f"Restock - using scraped browserdata - Watch UUID {uuid} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
if not self.fetcher.instock_data:
raise UnableToExtractRestockData(status_code=self.fetcher.status_code)
# Main detection method
fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest()
# The main thing that all this at the moment comes down to :)
changed_detected = False
logger.debug(f"Watch UUID {uuid} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
# out of stock -> back in stock only?
if watch.get('restock') and watch['restock'].get('in_stock') != update_obj['restock'].get('in_stock'):
# Yes if we only care about it going to instock, AND we are in stock
if watch.get('in_stock_only') and update_obj["in_stock"]:
if watch.get('in_stock_only') and update_obj['restock']['in_stock']:
changed_detected = True
if not watch.get('in_stock_only'):
# All cases
changed_detected = True
if watch.get('follow_price_changes') and watch.get('restock') and update_obj.get('restock') and update_obj['restock'].get('price'):
price = float(update_obj['restock'].get('price'))
# Default to current price if no previous price found
previous_price = float(watch['restock'].get('price', price))
# It was different, but negate it further down
if price != previous_price:
changed_detected = True
# Minimum/maximum price limit
if update_obj.get('restock') and update_obj['restock'].get('price'):
logger.debug(
f"{uuid} - Change was detected, 'price_change_max' is '{watch.get('price_change_max', '')}' 'price_change_min' is '{watch.get('price_change_min', '')}', price from website is '{update_obj['restock'].get('price', '')}'.")
if update_obj['restock'].get('price'):
min_limit = float(watch.get('price_change_min')) if watch.get('price_change_min') else None
max_limit = float(watch.get('price_change_max')) if watch.get('price_change_max') else None
price = float(update_obj['restock'].get('price'))
logger.debug(f"{uuid} after float conversion - Min limit: '{min_limit}' Max limit: '{max_limit}' Price: '{price}'")
if min_limit or max_limit:
if is_between(number=price, lower=min_limit, upper=max_limit):
if changed_detected:
logger.debug(f"{uuid} Override change-detected to FALSE because price was inside threshold")
changed_detected = False
if changed_detected and watch.get('price_change_threshold_percent'):
pc = float(watch.get('price_change_threshold_percent'))
change = abs((price - previous_price) / previous_price * 100)
if change and change <= pc:
logger.debug(f"{uuid} Override change-detected to FALSE because % threshold ({pc}%) was {change:.3f}%")
changed_detected = False
else:
logger.debug(f"{uuid} Price change was {change:.3f}% , (threshold {pc}%)")
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5
return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8').strip()

View File

@@ -1,8 +1,8 @@
function toggleOpacity(checkboxSelector, fieldSelector) {
function toggleOpacity(checkboxSelector, fieldSelector, inverted) {
const checkbox = document.querySelector(checkboxSelector);
const fields = document.querySelectorAll(fieldSelector);
function updateOpacity() {
const opacityValue = checkbox.checked ? 0.6 : 1;
const opacityValue = !checkbox.checked ? (inverted ? 0.6 : 1) : (inverted ? 1 : 0.6);
fields.forEach(field => {
field.style.opacity = opacityValue;
});
@@ -25,6 +25,8 @@ $(document).ready(function () {
$('#notification-tokens-info').toggle();
});
toggleOpacity('#time_between_check_use_default', '#time_between_check');
toggleOpacity('#time_between_check_use_default', '#time_between_check', false);
});

View File

@@ -186,12 +186,17 @@ code {
}
}
.watch-tag-list {
color: var(--color-white);
.inline-tag {
white-space: nowrap;
background: var(--color-text-watch-tag-list);
border-radius: 5px;
padding: 2px 5px;
margin-right: 4px;
}
.watch-tag-list {
color: var(--color-white);
background: var(--color-text-watch-tag-list);
@extend .inline-tag;
}
.box {
@@ -1056,9 +1061,8 @@ ul {
.tracking-ldjson-price-data {
background-color: var(--color-background-button-green);
color: #000;
padding: 3px;
border-radius: 3px;
white-space: nowrap;
opacity: 0.6;
@extend .inline-tag;
}
.ldjson-price-track-offer {
@@ -1104,9 +1108,12 @@ ul {
background-color: var(--color-background-button-cancel);
color: #777;
}
padding: 3px;
border-radius: 3px;
white-space: nowrap;
&.error {
background-color: var(--color-background-button-error);
color: #fff;
opacity: 0.7;
}
@extend .inline-tag;
}
#chrome-extension-link {

View File

@@ -531,12 +531,15 @@ code {
content: url();
margin: 0 3px 0 5px; }
.inline-tag, .watch-tag-list, .tracking-ldjson-price-data, .restock-label {
white-space: nowrap;
border-radius: 5px;
padding: 2px 5px;
margin-right: 4px; }
.watch-tag-list {
color: var(--color-white);
white-space: nowrap;
background: var(--color-text-watch-tag-list);
border-radius: 5px;
padding: 2px 5px; }
background: var(--color-text-watch-tag-list); }
.box {
max-width: 80%;
@@ -1152,9 +1155,7 @@ ul {
.tracking-ldjson-price-data {
background-color: var(--color-background-button-green);
color: #000;
padding: 3px;
border-radius: 3px;
white-space: nowrap; }
opacity: 0.6; }
.ldjson-price-track-offer {
font-weight: bold;
@@ -1179,16 +1180,18 @@ ul {
#quick-watch-processor-type ul li > * {
display: inline-block; }
.restock-label {
padding: 3px;
border-radius: 3px;
white-space: nowrap; }
.restock-label.in-stock {
background-color: var(--color-background-button-green);
color: #fff; }
.restock-label.not-in-stock {
background-color: var(--color-background-button-cancel);
color: #777; }
.restock-label.in-stock {
background-color: var(--color-background-button-green);
color: #fff; }
.restock-label.not-in-stock {
background-color: var(--color-background-button-cancel);
color: #777; }
.restock-label.error {
background-color: var(--color-background-button-error);
color: #fff;
opacity: 0.7; }
#chrome-extension-link {
padding: 9px;

View File

@@ -242,7 +242,7 @@ class ChangeDetectionStore:
# Remove a watchs data but keep the entry (URL etc)
def clear_watch_history(self, uuid):
import pathlib
from .model import Restock
self.__data['watching'][uuid].update({
'browser_steps_last_error_step' : None,
'check_count': 0,
@@ -257,6 +257,7 @@ class ChangeDetectionStore:
'previous_md5_before_filters': False,
'remote_server_reply': None,
'track_ldjson_price_data': None,
'restock': Restock()
})
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
@@ -622,7 +623,8 @@ class ChangeDetectionStore:
# Eventually almost everything todo with a watch will apply as a Tag
# So we use the same model as a Watch
with self.lock:
new_tag = Watch.model(datastore_path=self.datastore_path, default={
from .model import Tag
new_tag = Tag.model(datastore_path=self.datastore_path, default={
'title': name.strip(),
'date_created': int(time.time())
})
@@ -661,6 +663,12 @@ class ChangeDetectionStore:
return next((v for v in tags if v.get('title', '').lower() == tag_name.lower()),
None)
def any_watches_have_processor_by_name(self, processor_name):
for watch in self.data['watching'].values():
if watch.get('processor') == processor_name:
return True
return False
def get_updates_available(self):
import inspect
updates_available = []

View File

@@ -16,7 +16,7 @@
const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');
{% endif %}
const notification_base_url="{{url_for('ajax_callback_send_notification_test', watch_uuid=uuid)}}";
const playwright_enabled={% if playwright_enabled %} true {% else %} false {% endif %};
const playwright_enabled={% if playwright_enabled %}true{% else %}false{% endif %};
const recheck_proxy_start_url="{{url_for('check_proxies.start_check', uuid=uuid)}}";
const proxy_recheck_status_url="{{url_for('check_proxies.get_recheck_status', uuid=uuid)}}";
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
@@ -41,18 +41,16 @@
<ul>
<li class="tab" id=""><a href="#general">General</a></li>
<li class="tab"><a href="#request">Request</a></li>
{% if extra_tab_content %}
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
{% endif %}
{% if playwright_enabled %}
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
{% endif %}
{% if watch['processor'] == 'text_json_diff' %}
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
<li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
{% endif %}
{% if watch['processor'] == 'restock_diff' %}
<li class="tab"><a href="#restock">Restock Detection</a></li>
{% endif %}
<li class="tab"><a href="#notifications">Notifications</a></li>
<li class="tab"><a href="#stats">Stats</a></li>
</ul>
@@ -72,7 +70,7 @@
<span class="pure-form-message-inline">
{% if watch['processor'] == 'text_json_diff' %}
Current mode: <strong>Webpage Text/HTML, JSON and PDF changes.</strong><br>
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=restock_diff" class="pure-button button-xsmall">Switch to re-stock detection mode.</a>
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=restock_diff" class="pure-button button-xsmall">Switch to re-stock & Price detection mode for single product pages</a>
{% else %}
Current mode: <strong>Re-stock detection.</strong><br>
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=text_json_diff" class="pure-button button-xsmall">Switch to Webpage Text/HTML, JSON and PDF changes mode.</a>
@@ -413,18 +411,12 @@ Unavailable") }}
</div>
</div>
{% endif %}
{% if watch['processor'] == 'restock_diff' %}
<div class="tab-pane-inner" id="restock">
<fieldset>
<div class="pure-control-group">
{{ render_checkbox_field(form.in_stock_only) }}
<span class="pure-form-message-inline">Only trigger notifications when page changes from <strong>out of stock</strong> to <strong>back in stock</strong></span>
</div>
</fieldset>
{# rendered sub Template #}
{% if extra_form_content %}
<div class="tab-pane-inner" id="extras_tab">
{{ extra_form_content|safe }}
</div>
{% endif %}
{% endif %}
{% if watch['processor'] == 'text_json_diff' %}
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">

View File

@@ -59,6 +59,11 @@
{% set sort_order = sort_order or 'asc' %}
{% set sort_attribute = sort_attribute or 'last_changed' %}
{% set pagination_page = request.args.get('page', 0) %}
{% set cols_required = 6 %}
{% set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") %}
{% if any_has_restock_price_processor %}
{% set cols_required = cols_required + 1 %}
{% endif %}
<div id="watch-table-wrapper">
@@ -70,6 +75,9 @@
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
<th></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
{% if any_has_restock_price_processor %}
<th>Restock &amp; Price</th>
{% endif %}
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
<th></th>
@@ -78,7 +86,7 @@
<tbody>
{% if not watches|length %}
<tr>
<td colspan="6" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('import_page')}}" >import a list</a>.</td>
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('import_page')}}" >import a list</a>.</td>
</tr>
{% endif %}
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
@@ -91,6 +99,7 @@
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
{% if is_unviewed %}unviewed{% endif %}
{% if watch.has_restock_info %}has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %}{% endif %}
{% if watch.uuid in queued_uuids %}queued{% endif %}">
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
<td class="inline watch-controls">
@@ -135,30 +144,40 @@
{% if watch['processor'] == 'text_json_diff' %}
{% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %}
<div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
{% endif %}
{% if watch['track_ldjson_price_data'] == 'accepted' %}
{% endif %}
{% if watch['processor'] == 'restock_diff' %}
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
{% endif %}
{% endif %}
{% if watch['processor'] == 'restock_diff' %}
<span class="restock-label {{'in-stock' if watch['in_stock'] else 'not-in-stock' }}" title="detecting restock conditions">
<!-- maybe some object watch['processor'][restock_diff] or.. -->
{% if watch['last_checked'] and watch['in_stock'] != None %}
{% if watch['in_stock'] %} In stock {% else %} Not in stock {% endif %}
{% else %}
Not yet checked
{% endif %}
</span>
{% endif %}
{% for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() %}
<span class="watch-tag-list">{{ watch_tag.title }}</span>
{% endfor %}
</td>
{% if any_has_restock_price_processor %}
<td class="restock-and-price">
{% if watch['processor'] == 'restock_diff' %}
{% if watch.get('restock') and watch['restock']['in_stock'] != None %}
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price">
<!-- maybe some object watch['processor'][restock_diff] or.. -->
{% if watch['restock']['in_stock'] %} In stock {% else %} Not in stock {% endif %}
</span>
{% endif %}
{% if watch.get('restock') and watch['restock']['price'] != None %}
{% if watch['restock']['price'] != None %}
<span class="restock-label price {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Price">
{{ watch['restock']['price'] }} {{ watch['restock']['currency'] }}
</span>
{% endif %}
{% elif not watch.has_restock_info %}
<span class="restock-label error">No information</span>
{% endif %}
{% endif %}
</td>
{% endif %}
<td class="last-checked" data-timestamp="{{ watch.last_checked }}">{{watch|format_last_checked_time|safe}}</td>
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %}
{{watch.last_changed|format_timestamp_timeago}}

View File

@@ -1,235 +0,0 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import live_server_setup, extract_UUID_from_client, extract_api_key_from_UI, wait_for_all_checks
def set_response_with_ldjson():
test_return_data = """<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
<div class="sametext">Some text thats the same</div>
<div class="changetext">Some text that will change</div>
<script type="application/ld+json">
{
"@context":"https://schema.org/",
"@type":"Product",
"@id":"https://www.some-virtual-phone-shop.com/celular-iphone-14/p",
"name":"Celular Iphone 14 Pro Max 256Gb E Sim A16 Bionic",
"brand":{
"@type":"Brand",
"name":"APPLE"
},
"image":"https://www.some-virtual-phone-shop.com/15509426/image.jpg",
"description":"You dont need it",
"mpn":"111111",
"sku":"22222",
"Offers":{
"@type":"AggregateOffer",
"lowPrice":8097000,
"highPrice":8099900,
"priceCurrency":"COP",
"offers":[
{
"@type":"Offer",
"price":8097000,
"priceCurrency":"COP",
"availability":"http://schema.org/InStock",
"sku":"102375961",
"itemCondition":"http://schema.org/NewCondition",
"seller":{
"@type":"Organization",
"name":"ajax"
}
}
],
"offerCount":1
}
}
</script>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
return None
def set_response_without_ldjson():
test_return_data = """<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
<div class="sametext">Some text thats the same</div>
<div class="changetext">Some text that will change</div>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
return None
def test_setup(client, live_server):
live_server_setup(live_server)
# actually only really used by the distll.io importer, but could be handy too
def test_check_ldjson_price_autodetect(client, live_server):
set_response_with_ldjson()
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Should get a notice that it's available
res = client.get(url_for("index"))
assert b'ldjson-price-track-offer' in res.data
# Accept it
uuid = extract_UUID_from_client(client)
time.sleep(1)
client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
wait_for_all_checks(client)
# Trigger a check
time.sleep(1)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Offer should be gone
res = client.get(url_for("index"))
assert b'Embedded price data' not in res.data
assert b'tracking-ldjson-price-data' in res.data
# and last snapshop (via API) should be just the price
api_key = extract_api_key_from_UI(client)
res = client.get(
url_for("watchsinglehistory", uuid=uuid, timestamp='latest'),
headers={'x-api-key': api_key},
)
# Should see this (dont know where the whitespace came from)
assert b'"highPrice": 8099900' in res.data
# And not this cause its not the ld-json
assert b"So let's see what happens" not in res.data
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
##########################################################################################
# And we shouldnt see the offer
set_response_without_ldjson()
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'ldjson-price-track-offer' not in res.data
##########################################################################################
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data):
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
for k,v in client.application.config.get('DATASTORE').data['watching'].items():
assert v.get('last_error') == False
assert v.get('has_ldjson_price_data') == has_ldjson_price_data
##########################################################################################
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_bad_ldjson_is_correctly_ignored(client, live_server):
#live_server_setup(live_server)
test_return_data = """
<html>
<head>
<script type="application/ld+json">
{
"@context": "http://schema.org",
"@type": ["Product", "SubType"],
"name": "My test product",
"description": "",
"offers": {
"note" : "You can see the case-insensitive OffERS key, it should work",
"@type": "Offer",
"offeredBy": {
"@type": "Organization",
"name":"Person",
"telephone":"+1 999 999 999"
},
"price": "1",
"priceCurrency": "EUR",
"url": "/some/url"
}
}
</script>
</head>
<body>
<div class="yes">Some extra stuff</div>
</body></html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
_test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=True)
test_return_data = """
<html>
<head>
<script type="application/ld+json">
{
"@context": "http://schema.org",
"@type": ["Product", "SubType"],
"name": "My test product",
"description": "",
"BrokenOffers": {
"@type": "Offer",
"offeredBy": {
"@type": "Organization",
"name":"Person",
"telephone":"+1 999 999 999"
},
"price": "1",
"priceCurrency": "EUR",
"url": "/some/url"
}
}
</script>
</head>
<body>
<div class="yes">Some extra stuff</div>
</body></html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
_test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=False)

View File

@@ -10,6 +10,7 @@ def test_setup(live_server):
# Hard to just add more live server URLs when one test is already running (I think)
# So we add our test here (was in a different file)
def test_headers_in_request(client, live_server):
#ve_server_setup(live_server)
# Add our URL to the import page
test_url = url_for('test_headers', _external=True)
@@ -378,11 +379,17 @@ def test_headers_textfile_in_request(client, live_server):
with open('test-datastore/' + extract_UUID_from_client(client) + '/headers.txt', 'w') as f:
f.write("watch-header: nice")
wait_for_all_checks(client)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
# Give the thread time to pick it up, this actually is not super reliable and pytest can terminate before the check is ran
wait_for_all_checks(client)
# WARNING - pytest and 'wait_for_all_checks' shuts down before it has actually stopped processing when using pyppeteer fetcher
# so adding more time here
if os.getenv('FAST_PUPPETEER_CHROME_FETCHER'):
time.sleep(6)
res = client.get(url_for("edit_page", uuid="first"))
assert b"Extra headers file found and will be added to this watch" in res.data

View File

@@ -0,0 +1,258 @@
#!/usr/bin/python3
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
instock_props = [
# LD+JSON with non-standard list of 'type' https://github.com/dgtlmoon/changedetection.io/issues/1833
'<script type=\'application/ld+json\'>{"@context": "http://schema.org","@type": ["Product", "SubType"],"name": "My test product","description":"","Offers": { "@type": "Offer", "offeredBy": { "@type": "Organization", "name":"Person", "telephone":"+1 999 999 999" }, "price": $$PRICE$$, "priceCurrency": "EUR", "url": "/some/url", "availability": "http://schema.org/InStock"} }</script>',
# LD JSON
'<script type=\'application/ld+json\'>[{"@context":"http://schema.org","@type":"WebSite","name":"partsDíly.cz","description":"Nejlevnější autodlíly.","url":"https://parts.com/?id=3038915","potentialAction":{"@type":"SearchAction","target":"https://parts.com/vyhledavani?search={query}","query-input":{"@type":"PropertyValueSpecification","valueRequired":"http://schema.org/True","valueName":"query"}},"publisher":{"@context":"http://schema.org","@type":"Organization","name":"Car Díly.cz","url":"https://carparts.com/","logo":"https://parts.com/77026_3195959275.png","sameAs":["https://twitter.com/parts","https://www.instagram.com/parts/?hl=cs"]},"sameAs":["https://twitter.com/parts","https://www.instagram.com/parts/"]},{"@context":"http://schema.org","@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","position":0,"item":{"@id":"/autodily","name":"Autodíly pro osobní vozy"}},{"@type":"ListItem","position":1,"item":{"@id":"/autodily/dodge","name":"DODGE"}},{"@type":"ListItem","position":2,"item":{"@id":"https://parts.com/280kw","name":"parts parts • 100 kW"}}]},{"@context":"http://schema.org","@type":"Product","name":"Olejový filtr K&N Filters","description":"","mpn":"xxx11","brand":"K&N Filters","image":"https://parts.com/images/1600/c8fe1f1428021f4fe17a39297686178b04cba885.jpg","offers":{"@context":"http://schema.org","@type":"Offer","price":$$PRICE$$,"priceCurrency":"CZK","url":"https://parts.com/filters/hp","availability":"http://schema.org/InStock"}}]</script>',
'<script id="product-jsonld" type="application/ld+json">{"@context":"https://schema.org","@type":"Product","brand":{"@type":"Brand","name":"Ubiquiti"},"name":"UniFi Express","sku":"UX","description":"Impressively compact UniFi Cloud Gateway and WiFi 6 access point that runs UniFi Network. Powers an entire network or simply meshes as an access point.","url":"https://store.ui.com/us/en/products/ux","image":{"@type":"ImageObject","url":"https://cdn.ecomm.ui.com/products/4ed25b4c-db92-4b98-bbf3-b0989f007c0e/123417a2-895e-49c7-ba04-b6cd8f6acc03.png","width":"1500","height":"1500"},"offers":{"@type":"Offer","availability":"https://schema.org/InStock","priceSpecification":{"@type":"PriceSpecification","price":$$PRICE$$,"priceCurrency":"USD","valueAddedTaxIncluded":false}}}</script>',
'<script id="product-schema" type="application/ld+json">{"@context": "https://schema.org","@type": "Product","itemCondition": "https://schema.org/NewCondition","image": "//1.com/hmgo","name": "Polo MuscleFit","color": "Beige","description": "Polo","sku": "0957102010","brand": {"@type": "Brand","name": "H&M"},"category": {"@type": "Thing","name": "Polo"},"offers": [{"@type": "Offer","url": "https:/www2.xxxxxx.com/fr_fr/productpage.0957102010.html","priceCurrency": "EUR","price": $$PRICE$$,"availability": "http://schema.org/InStock","seller": { "@type": "Organization", "name": "H&amp;M"}}]}</script>'
# Microdata
'<div itemscope itemtype="https://schema.org/Product"><h1 itemprop="name">Example Product</h1><p itemprop="description">This is a sample product description.</p><div itemprop="offers" itemscope itemtype="https://schema.org/Offer"><p>Price: <span itemprop="price">$$$PRICE$$</span></p><link itemprop="availability" href="https://schema.org/InStock" /></div></div>'
]
out_of_stock_props = [
# out of stock AND contains multiples
'<script type="application/ld+json">{"@context":"http://schema.org","@type":"WebSite","url":"https://www.medimops.de/","potentialAction":{"@type":"SearchAction","target":"https://www.medimops.de/produkte-C0/?fcIsSearch=1&searchparam={searchparam}","query-input":"required name=searchparam"}}</script><script type="application/ld+json">{"@context":"http://schema.org","@type":"Product","name":"Horsetrader: Robert Sangster and the Rise and Fall of the Sport of Kings","image":"https://images2.medimops.eu/product/43a982/M00002551322-large.jpg","productID":"isbn:9780002551328","gtin13":"9780002551328","category":"Livres en langue étrangère","offers":{"@type":"Offer","priceCurrency":"EUR","price":$$PRICE$$,"itemCondition":"UsedCondition","availability":"OutOfStock"},"brand":{"@type":"Thing","name":"Patrick Robinson","url":"https://www.momox-shop.fr/,patrick-robinson/"}}</script>'
]
def set_original_response(props_markup='', price="121.95"):
props_markup=props_markup.replace('$$PRICE$$', price)
test_return_data = f"""<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
<div>price: ${price}</div>
{props_markup}
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
return None
def test_setup(client, live_server):
live_server_setup(live_server)
def test_restock_itemprop_basic(client, live_server):
#live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True)
for p in instock_props:
set_original_response(props_markup=p)
client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True
)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'has-restock-info in-stock' in res.data
assert b'has-restock-info not-in-stock' not in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
for p in out_of_stock_props:
set_original_response(props_markup=p)
client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tags": '', 'processor': 'restock_diff'},
follow_redirects=True
)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'has-restock-info not-in-stock' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_itemprop_price_change(client, live_server):
#live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True)
set_original_response(props_markup=instock_props[0], price="190.95")
client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True
)
# A change in price, should trigger a change by default
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'190.95' in res.data
# basic price change, look for notification
set_original_response(props_markup=instock_props[0], price='180.45')
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'180.45' in res.data
assert b'unviewed' in res.data
client.get(url_for("mark_all_viewed"), follow_redirects=True)
# turning off price change trigger, but it should show the new price, with no change notification
set_original_response(props_markup=instock_props[0], price='120.45')
res = client.post(
url_for("edit_page", uuid="first"),
data={"follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'120.45' in res.data
assert b'unviewed' not in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_itemprop_price_minmax_limit(client, live_server):
#live_server_setup(live_server)
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
test_url = url_for('test_endpoint', _external=True)
set_original_response(props_markup=instock_props[0], price="950.95")
client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True
)
# A change in price, should trigger a change by default
wait_for_all_checks(client)
res = client.post(
url_for("edit_page", uuid="first"),
data={"follow_price_changes": "y",
"price_change_min": 900.0,
"price_change_max": 1100.10,
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests"
},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
client.get(url_for("mark_all_viewed"))
# price changed to something greater than min (900), and less than max (1100).. should be no change
set_original_response(props_markup=instock_props[0], price='1000.45')
client.get(url_for("form_watch_checknow"))
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'1000.45' in res.data
assert b'unviewed' not in res.data
# price changed to something LESS than min (900), SHOULD be a change
set_original_response(props_markup=instock_props[0], price='890.45')
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'890.45' in res.data
assert b'unviewed' in res.data
client.get(url_for("mark_all_viewed"))
# price changed to something MORE than max (1100.10), SHOULD be a change
set_original_response(props_markup=instock_props[0], price='1890.45')
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'1890.45' in res.data
assert b'unviewed' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_itemprop_percent_threshold(client, live_server):
#live_server_setup(live_server)
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
test_url = url_for('test_endpoint', _external=True)
set_original_response(props_markup=instock_props[0], price="950.95")
client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True
)
# A change in price, should trigger a change by default
wait_for_all_checks(client)
res = client.post(
url_for("edit_page", uuid="first"),
data={"follow_price_changes": "y",
"price_change_threshold_percent": 5.0,
"url": test_url,
"tags": "",
"headers": "",
'fetch_backend': "html_requests"
},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
# Basic change should not trigger
set_original_response(props_markup=instock_props[0], price='960.45')
client.get(url_for("form_watch_checknow"))
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'960.45' in res.data
assert b'unviewed' not in res.data
# Bigger INCREASE change than the threshold should trigger
set_original_response(props_markup=instock_props[0], price='1960.45')
client.get(url_for("form_watch_checknow"))
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'1960.45' in res.data
assert b'unviewed' in res.data
# Small decrease should NOT trigger
client.get(url_for("mark_all_viewed"))
set_original_response(props_markup=instock_props[0], price='1950.45')
client.get(url_for("form_watch_checknow"))
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'1950.45' in res.data
assert b'unviewed' not in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -0,0 +1,21 @@
#!/usr/bin/python3
# run from dir above changedetectionio/ dir
# python3 -m unittest changedetectionio.tests.unit.test_restock_logic
import unittest
import os
from changedetectionio.processors import restock_diff
# mostly
class TestDiffBuilder(unittest.TestCase):
def test_logic(self):
assert restock_diff.is_between(number=10, lower=9, upper=11) == True, "Between 9 and 11"
assert restock_diff.is_between(number=10, lower=0, upper=11) == True, "Between 9 and 11"
assert restock_diff.is_between(number=10, lower=None, upper=11) == True, "Between None and 11"
assert not restock_diff.is_between(number=12, lower=None, upper=11) == True, "12 is not between None and 11"
if __name__ == '__main__':
unittest.main()

View File

@@ -121,18 +121,21 @@ def extract_UUID_from_client(client):
return uuid.strip()
def wait_for_all_checks(client):
# actually this is not entirely true, it can still be 'processing' but not in the queue
# Loop waiting until done..
attempt=0
time.sleep(0.1)
# because sub-second rechecks are problematic in testing, use lots of delays
time.sleep(1)
while attempt < 60:
time.sleep(1)
res = client.get(url_for("index"))
if not b'Checking now' in res.data:
break
logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
time.sleep(1)
attempt += 1
time.sleep(1)
def live_server_setup(live_server):
@live_server.app.route('/test-random-content-endpoint')

View File

@@ -454,9 +454,12 @@ class update_worker(threading.Thread):
except UnableToExtractRestockData as e:
# Usually when fetcher.instock_data returns empty
logger.error(f"Exception (UnableToExtractRestockData) reached processing watch UUID: {uuid}")
logger.error(str(e))
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Unable to extract restock data for this page unfortunately. (Got code {e.status_code} from server)"})
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
self.datastore.update_watch(uuid=uuid,
update_obj={
'last_error': f"Unable to extract restock data for this page unfortunately. (Got code {e.status_code} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
}
)
process_changedetection_results = False
except Exception as e:
logger.error(f"Exception reached processing watch UUID: {uuid}")

View File

@@ -41,10 +41,8 @@ apprise~=1.8.0
# use v1.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
paho-mqtt>=1.6.1,<2.0.0
# This mainly affects some ARM builds, which unlike the other builds ignores "ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1"
# so without this pinning, the newer versions on ARM will forcefully try to build rust, which results in "rust compiler not found"
# (introduced once apprise became a dep)
cryptography~=3.4
# Requires extra wheel for rPi
cryptography~=42.0.8
# Used for CSS filtering
beautifulsoup4
@@ -84,5 +82,9 @@ pytest-flask ~=1.2
jsonschema==4.17.3
loguru
# For scraping all possible metadata relating to products so we can do better restock detection
extruct
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
greenlet >= 3.0.3
greenlet >= 3.0.3