Compare commits

..

19 Commits

Author SHA1 Message Date
dgtlmoon
62e99d41be aiosmtpd only needed for test 2024-06-17 09:22:50 +02:00
dgtlmoon
cdf07983db plaintext fixes 2024-06-17 09:00:18 +02:00
dgtlmoon
27401b3202 oops 2024-06-17 08:41:35 +02:00
dgtlmoon
4baa9489c3 upgrade to asiosmtpd 2024-06-16 23:23:58 +02:00
dgtlmoon
503e67f33e maybe 2024-06-16 19:31:05 +02:00
dgtlmoon
f290d0e5fe oops 2024-06-16 19:19:11 +02:00
dgtlmoon
933d4ce886 skip pypuppeteer 2024-06-16 18:54:04 +02:00
dgtlmoon
50c7e1bf8c woops 2024-06-16 17:06:44 +02:00
dgtlmoon
73eb00a3e9 hmm 2024-06-16 17:00:01 +02:00
dgtlmoon
e75561b2f5 prove version 2024-06-16 16:54:10 +02:00
dgtlmoon
005ed20741 WIP 2024-06-16 16:50:30 +02:00
dgtlmoon
2b0f851cbc add greenlet 2024-06-16 16:43:49 +02:00
dgtlmoon
56aeefc53e woops 2024-06-16 16:39:38 +02:00
dgtlmoon
f7c7a3dbb8 add more debug 2024-06-16 16:11:28 +02:00
dgtlmoon
e5775dd68e WIP 2024-06-16 16:09:37 +02:00
dgtlmoon
97e4ae1194 WIP 2024-06-16 16:08:03 +02:00
dgtlmoon
de955a54bd woops 2024-06-16 16:05:29 +02:00
dgtlmoon
5037e66ec1 Test multiple python versions 2024-06-16 16:03:54 +02:00
dgtlmoon
db48cc64f5 split build from lint, try 3.10 as build arg 2024-06-16 15:55:51 +02:00
42 changed files with 293 additions and 382 deletions

View File

@@ -88,7 +88,7 @@ jobs:
- name: Build and push :dev
id: docker_build
if: ${{ github.ref }} == "refs/heads/master"
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
context: ./
file: ./Dockerfile
@@ -106,7 +106,7 @@ jobs:
- name: Build and push :tag
id: docker_build_tag_release
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
context: ./
file: ./Dockerfile

View File

@@ -51,7 +51,7 @@ jobs:
# Check we can still build under alpine/musl
- name: Test that the docker containers can build (musl via alpine check)
id: docker_build_musl
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
context: ./
file: ./.github/test/Dockerfile-alpine
@@ -59,7 +59,7 @@ jobs:
- name: Test that the docker containers can build
id: docker_build
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
# https://github.com/docker/build-push-action#customizing
with:
context: ./

View File

@@ -3,9 +3,9 @@
# @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py
# If you know how to fix it, please do! and test it for both 3.10 and 3.11
ARG PYTHON_VERSION=3.11
ARG PYTHON_VERSION=3.10
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
FROM python:${PYTHON_VERSION}-slim-bookworm as builder
# See `cryptography` pin comment in requirements.txt
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
@@ -26,8 +26,7 @@ WORKDIR /install
COPY requirements.txt /requirements.txt
# --extra-index-url https://www.piwheels.org/simple is for cryptography module to be prebuilt (or rustc etc needs to be installed)
RUN pip install --extra-index-url https://www.piwheels.org/simple --target=/dependencies -r /requirements.txt
RUN pip install --target=/dependencies -r /requirements.txt
# Playwright is an alternative to Selenium
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.45.25'
__version__ = '0.45.23'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -170,33 +170,23 @@ class WatchSingleHistory(Resource):
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
@apiName Get single snapshot content
@apiGroup Watch History
@apiParam {String} [html] Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp)
@apiSuccess (200) {String} OK
@apiSuccess (404) {String} ERR Not found
"""
watch = self.datastore.data['watching'].get(uuid)
if not watch:
abort(404, message=f"No watch exists with the UUID of {uuid}")
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
if not len(watch.history):
abort(404, message=f"Watch found but no history exists for the UUID {uuid}")
abort(404, message='Watch found but no history exists for the UUID {}'.format(uuid))
if timestamp == 'latest':
timestamp = list(watch.history.keys())[-1]
if request.args.get('html'):
content = watch.get_fetched_html(timestamp)
if content:
response = make_response(content, 200)
response.mimetype = "text/html"
else:
response = make_response("No content found", 404)
response.mimetype = "text/plain"
else:
content = watch.get_history_snapshot(timestamp)
response = make_response(content, 200)
response.mimetype = "text/plain"
content = watch.get_history_snapshot(timestamp)
response = make_response(content, 200)
response.mimetype = "text/plain"
return response

View File

@@ -187,10 +187,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
if is_last_step and u:
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data()
watch = datastore.data['watching'].get(uuid)
if watch:
watch.save_screenshot(screenshot=screenshot)
watch.save_xpath_data(data=xpath_data)
datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
# if not this_session.page:
# cleanup_playwright_session()

View File

@@ -255,8 +255,8 @@ class browsersteps_live_ui(steppable_browser_interface):
def get_current_state(self):
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
import importlib.resources
xpath_element_js = importlib.resources.read_text("changedetectionio.content_fetchers.res", "xpath_element_scraper.js")
from pkg_resources import resource_string
xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8')
now = time.time()
self.page.wait_for_timeout(1 * 1000)
@@ -287,9 +287,11 @@ class browsersteps_live_ui(steppable_browser_interface):
:param current_include_filters:
:return:
"""
import importlib.resources
self.page.evaluate("var include_filters=''")
xpath_element_js = importlib.resources.read_text("changedetectionio.content_fetchers.res", "xpath_element_scraper.js")
from pkg_resources import resource_string
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8')
from changedetectionio.content_fetchers import visualselector_xpath_selectors
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")

View File

@@ -63,7 +63,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
<ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}

View File

@@ -64,9 +64,10 @@ class Fetcher():
render_extract_delay = 0
def __init__(self):
import importlib.resources
self.xpath_element_js = importlib.resources.read_text("changedetectionio.content_fetchers.res", 'xpath_element_scraper.js')
self.instock_data_js = importlib.resources.read_text("changedetectionio.content_fetchers.res", 'stock-not-in-stock.js')
from pkg_resources import resource_string
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8')
self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8')
@abstractmethod
def get_error(self):

View File

@@ -1 +0,0 @@
# resources for browser injection/scraping

View File

@@ -30,21 +30,14 @@ function isItemInStock() {
'dieser artikel ist bald wieder verfügbar',
'dostępne wkrótce',
'en rupture de stock',
'isn\'t in stock right now',
'isnt in stock right now',
'isnt in stock right now',
'ist derzeit nicht auf lager',
'item is no longer available',
'let me know when it\'s available',
'mail me when available',
'message if back in stock',
'nachricht bei',
'nicht auf lager',
'nicht lagernd',
'nicht lieferbar',
'nicht verfügbar',
'nicht vorrätig',
'nicht zur verfügung',
'nie znaleziono produktów',
'niet beschikbaar',
'niet leverbaar',
'niet op voorraad',
@@ -55,7 +48,6 @@ function isItemInStock() {
'not currently available',
'not in stock',
'notify me when available',
'notify me',
'notify when available',
'não estamos a aceitar encomendas',
'out of stock',
@@ -70,16 +62,12 @@ function isItemInStock() {
'this item is currently unavailable',
'tickets unavailable',
'tijdelijk uitverkocht',
'unavailable nearby',
'unavailable tickets',
'vergriffen',
'vorbestellen',
'vorbestellung ist bald möglich',
'we couldn\'t find any products that match',
'we do not currently have an estimate of when this product will be back in stock.',
'we don\'t know when or if this item will be back in stock.',
'we were not able to find a match',
'when this arrives in stock',
'zur zeit nicht an lager',
'品切れ',
'已售',

View File

@@ -679,10 +679,7 @@ def changedetection_app(config=None, datastore_o=None):
if request.method == 'POST' and form.validate():
extra_update_obj = {
'consecutive_filter_failures': 0,
'last_error' : False
}
extra_update_obj = {}
if request.args.get('unpause_on_save'):
extra_update_obj['paused'] = False
@@ -721,7 +718,7 @@ def changedetection_app(config=None, datastore_o=None):
datastore.data['watching'][uuid].update(extra_update_obj)
if request.args.get('unpause_on_save'):
flash("Updated watch - unpaused!")
flash("Updated watch - unpaused!.")
else:
flash("Updated watch.")

View File

@@ -3,6 +3,8 @@ from bs4 import BeautifulSoup
from inscriptis import get_text
from jsonpath_ng.ext import parse
from typing import List
from inscriptis.css_profiles import CSS_PROFILES, HtmlElement
from inscriptis.html_properties import Display
from inscriptis.model.config import ParserConfig
from xml.sax.saxutils import escape as xml_escape
import json
@@ -194,12 +196,12 @@ def extract_element(find='title', html_content=''):
#
def _parse_json(json_data, json_filter):
if json_filter.startswith("json:"):
if 'json:' in json_filter:
jsonpath_expression = parse(json_filter.replace('json:', ''))
match = jsonpath_expression.find(json_data)
return _get_stripped_text_from_json_match(match)
if json_filter.startswith("jq:") or json_filter.startswith("jqraw:"):
if 'jq:' in json_filter:
try:
import jq
@@ -207,15 +209,10 @@ def _parse_json(json_data, json_filter):
# `jq` requires full compilation in windows and so isn't generally available
raise Exception("jq not support not found")
if json_filter.startswith("jq:"):
jq_expression = jq.compile(json_filter.removeprefix("jq:"))
match = jq_expression.input(json_data).all()
return _get_stripped_text_from_json_match(match)
jq_expression = jq.compile(json_filter.replace('jq:', ''))
match = jq_expression.input(json_data).all()
if json_filter.startswith("jqraw:"):
jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
match = jq_expression.input(json_data).all()
return '\n'.join(str(item) for item in match)
return _get_stripped_text_from_json_match(match)
def _get_stripped_text_from_json_match(match):
s = []

View File

@@ -5,7 +5,6 @@ from changedetectionio.notification import (
default_notification_title,
)
# Equal to or greater than this number of FilterNotFoundInResponse exceptions will trigger a filter-not-found notification
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'

View File

@@ -328,10 +328,15 @@ class model(dict):
def save_history_text(self, contents, timestamp, snapshot_id):
import brotli
logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
self.ensure_data_dir_exists()
# Small hack so that we sleep just enough to allow 1 second between history snapshots
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
logger.warning(f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
timestamp = str(int(timestamp) + 1)
time.sleep(1)
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
@@ -523,42 +528,8 @@ class model(dict):
# None is set
return False
def save_error_text(self, contents):
self.ensure_data_dir_exists()
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
with open(target_path, 'w') as f:
f.write(contents)
def save_xpath_data(self, data, as_error=False):
import json
if as_error:
target_path = os.path.join(self.watch_data_dir, "elements-error.json")
else:
target_path = os.path.join(self.watch_data_dir, "elements.json")
self.ensure_data_dir_exists()
with open(target_path, 'w') as f:
f.write(json.dumps(data))
f.close()
# Save as PNG, PNG is larger but better for doing visual diff in the future
def save_screenshot(self, screenshot: bytes, as_error=False):
if as_error:
target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
else:
target_path = os.path.join(self.watch_data_dir, "last-screenshot.png")
self.ensure_data_dir_exists()
with open(target_path, 'wb') as f:
f.write(screenshot)
f.close()
def get_last_fetched_text_before_filters(self):
def get_last_fetched_before_filters(self):
import brotli
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
@@ -573,56 +544,12 @@ class model(dict):
with open(filepath, 'rb') as f:
return(brotli.decompress(f.read()).decode('utf-8'))
def save_last_text_fetched_before_filters(self, contents):
def save_last_fetched_before_filters(self, contents):
import brotli
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
with open(filepath, 'wb') as f:
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
def save_last_fetched_html(self, timestamp, contents):
import brotli
self.ensure_data_dir_exists()
snapshot_fname = f"{timestamp}.html.br"
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
with open(filepath, 'wb') as f:
contents = contents.encode('utf-8') if isinstance(contents, str) else contents
try:
f.write(brotli.compress(contents))
except Exception as e:
logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}")
logger.warning(e)
f.write(contents)
self._prune_last_fetched_html_snapshots()
def get_fetched_html(self, timestamp):
import brotli
snapshot_fname = f"{timestamp}.html.br"
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
if os.path.isfile(filepath):
with open(filepath, 'rb') as f:
return (brotli.decompress(f.read()).decode('utf-8'))
return False
def _prune_last_fetched_html_snapshots(self):
dates = list(self.history.keys())
dates.reverse()
for index, timestamp in enumerate(dates):
snapshot_fname = f"{timestamp}.html.br"
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
# Keep only the first 2
if index > 1 and os.path.isfile(filepath):
os.remove(filepath)
@property
def get_browsersteps_available_screenshots(self):
"For knowing which screenshots are available to show the user in BrowserSteps UI"

View File

@@ -1,6 +1,5 @@
from abc import abstractmethod
from changedetectionio.strtobool import strtobool
from changedetectionio.model import Watch
from copy import deepcopy
from loguru import logger
import hashlib
@@ -139,7 +138,7 @@ class difference_detection_processor():
# After init, call run_changedetection() which will do the actual change-detection
@abstractmethod
def run_changedetection(self, watch: Watch, skip_when_checksum_same=True):
def run_changedetection(self, uuid, skip_when_checksum_same=True):
update_obj = {'last_notification_error': False, 'last_error': False}
some_data = 'xxxxx'
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()

View File

@@ -1,5 +1,6 @@
from . import difference_detection_processor
from copy import deepcopy
from loguru import logger
import hashlib
import urllib3
@@ -19,7 +20,10 @@ class perform_site_check(difference_detection_processor):
screenshot = None
xpath_data = None
def run_changedetection(self, watch, skip_when_checksum_same=True):
def run_changedetection(self, uuid, skip_when_checksum_same=True):
# DeepCopy so we can be sure we don't accidently change anything by reference
watch = deepcopy(self.datastore.data['watching'].get(uuid))
if not watch:
raise Exception("Watch no longer exists.")
@@ -40,13 +44,13 @@ class perform_site_check(difference_detection_processor):
fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest()
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
update_obj["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
logger.debug(f"Watch UUID {uuid} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
else:
raise UnableToExtractRestockData(status_code=self.fetcher.status_code)
# The main thing that all this at the moment comes down to :)
changed_detected = False
logger.debug(f"Watch UUID {watch.get('uuid')} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
logger.debug(f"Watch UUID {uuid} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
# Yes if we only care about it going to instock, AND we are in stock

View File

@@ -10,17 +10,18 @@ from . import difference_detection_processor
from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
from changedetectionio import html_tools, content_fetchers
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
import changedetectionio.content_fetchers
from copy import deepcopy
from loguru import logger
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
name = 'Webpage Text/HTML, JSON and PDF changes'
description = 'Detects all text changes where possible'
json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
json_filter_prefixes = ['json:', 'jq:']
class FilterNotFoundInResponse(ValueError):
def __init__(self, msg, screenshot=None):
self.screenshot = screenshot
def __init__(self, msg):
ValueError.__init__(self, msg)
@@ -33,12 +34,14 @@ class PDFToHTMLToolNotFound(ValueError):
# (set_proxy_from_list)
class perform_site_check(difference_detection_processor):
def run_changedetection(self, watch, skip_when_checksum_same=True):
def run_changedetection(self, uuid, skip_when_checksum_same=True):
changed_detected = False
html_content = ""
screenshot = False # as bytes
stripped_text_from_html = ""
# DeepCopy so we can be sure we don't accidently change anything by reference
watch = deepcopy(self.datastore.data['watching'].get(uuid))
if not watch:
raise Exception("Watch no longer exists.")
@@ -113,12 +116,12 @@ class perform_site_check(difference_detection_processor):
# Better would be if Watch.model could access the global data also
# and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__
# https://realpython.com/inherit-python-dict/ instead of doing it procedurely
include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters')
include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='include_filters')
# 1845 - remove duplicated filters in both group and watch include filter
include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags))
subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'),
subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='subtractive_selectors'),
*watch.get("subtractive_selectors", []),
*self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
]
@@ -185,7 +188,7 @@ class perform_site_check(difference_detection_processor):
append_pretty_line_formatting=not watch.is_source_type_url)
if not html_content.strip():
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot)
raise FilterNotFoundInResponse(include_filters_rule)
if has_subtractive_selectors:
html_content = html_tools.element_removal(subtractive_selectors, html_content)
@@ -219,7 +222,7 @@ class perform_site_check(difference_detection_processor):
from .. import diff
# needs to not include (added) etc or it may get used twice
# Replace the processed text with the preferred result
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_before_filters(),
newest_version_file_contents=stripped_text_from_html,
include_equal=False, # not the same lines
include_added=watch.get('filter_text_added', True),
@@ -228,7 +231,7 @@ class perform_site_check(difference_detection_processor):
line_feed_sep="\n",
include_change_type_prefix=False)
watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter)
watch.save_last_fetched_before_filters(text_content_before_ignored_filter)
if not rendered_diff and stripped_text_from_html:
# We had some content, but no differences were found
@@ -341,17 +344,17 @@ class perform_site_check(difference_detection_processor):
if not watch['title'] or not len(watch['title']):
update_obj['title'] = html_tools.extract_element(find='title', html_content=self.fetcher.content)
logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
logger.debug(f"Watch UUID {uuid} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
if changed_detected:
if watch.get('check_unique_lines', False):
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
# One or more lines? unsure?
if not has_unique_lines:
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
logger.debug(f"check_unique_lines: UUID {uuid} didnt have anything new setting change_detected=False")
changed_detected = False
else:
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
logger.debug(f"check_unique_lines: UUID {uuid} had unique content")
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5

View File

@@ -1,5 +1,14 @@
$(document).ready(function () {
// duplicate
var csrftoken = $('input[name=csrf_token]').val();
$.ajaxSetup({
beforeSend: function (xhr, settings) {
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
xhr.setRequestHeader("X-CSRFToken", csrftoken)
}
}
})
var browsersteps_session_id;
var browser_interface_seconds_remaining = 0;
var apply_buttons_disabled = false;

View File

@@ -1,10 +0,0 @@
$(document).ready(function () {
$.ajaxSetup({
beforeSend: function (xhr, settings) {
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
xhr.setRequestHeader("X-CSRFToken", csrftoken)
}
}
})
});

View File

@@ -1,4 +1,13 @@
$(document).ready(function () {
var csrftoken = $('input[name=csrf_token]').val();
$.ajaxSetup({
beforeSend: function (xhr, settings) {
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
xhr.setRequestHeader("X-CSRFToken", csrftoken)
}
}
})
$('.needs-localtime').each(function () {
for (var option of this.options) {
var dateObject = new Date(option.value * 1000);
@@ -39,12 +48,6 @@ $(document).ready(function () {
$("#highlightSnippet").remove();
}
// Listen for Escape key press
window.addEventListener('keydown', function (e) {
if (e.key === 'Escape') {
clean();
}
}, false);
function dragTextHandler(event) {
console.log('mouseupped');

View File

@@ -13,6 +13,16 @@ $(document).ready(function() {
$('#send-test-notification').click(function (e) {
e.preventDefault();
// this can be global
var csrftoken = $('input[name=csrf_token]').val();
$.ajaxSetup({
beforeSend: function(xhr, settings) {
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
xhr.setRequestHeader("X-CSRFToken", csrftoken)
}
}
})
data = {
notification_body: $('#notification_body').val(),
notification_format: $('#notification_format').val(),

View File

@@ -671,25 +671,14 @@ footer {
and also iPads specifically.
*/
.watch-table {
/* make headings work on mobile */
thead {
display: block;
tr {
th {
display: inline-block;
}
}
.empty-cell {
display: none;
}
}
/* Force table to not be like tables anymore */
tbody {
td,
tr {
display: block;
}
thead,
tbody,
th,
td,
tr {
display: block;
}
.last-checked {
@@ -713,6 +702,13 @@ footer {
display: inline-block;
}
/* Hide table headers (but not display: none;, for accessibility) */
thead tr {
position: absolute;
top: -9999px;
left: -9999px;
}
.pure-table td,
.pure-table th {
border: none;
@@ -757,7 +753,6 @@ footer {
thead {
background-color: var(--color-background-table-thead);
color: var(--color-text);
border-bottom: 1px solid var(--color-background-table-thead);
}
td,

View File

@@ -863,17 +863,14 @@ footer {
and also iPads specifically.
*/
.watch-table {
/* make headings work on mobile */
/* Force table to not be like tables anymore */
/* Force table to not be like tables anymore */ }
.watch-table thead {
display: block; }
.watch-table thead tr th {
display: inline-block; }
.watch-table thead .empty-cell {
display: none; }
.watch-table tbody td,
.watch-table tbody tr {
/* Force table to not be like tables anymore */
/* Hide table headers (but not display: none;, for accessibility) */ }
.watch-table thead,
.watch-table tbody,
.watch-table th,
.watch-table td,
.watch-table tr {
display: block; }
.watch-table .last-checked > span {
vertical-align: middle; }
@@ -885,6 +882,10 @@ footer {
content: "Last Changed "; }
.watch-table td.inline {
display: inline-block; }
.watch-table thead tr {
position: absolute;
top: -9999px;
left: -9999px; }
.watch-table .pure-table td,
.watch-table .pure-table th {
border: none; }
@@ -911,8 +912,7 @@ footer {
border-color: var(--color-border-table-cell); }
.pure-table thead {
background-color: var(--color-background-table-thead);
color: var(--color-text);
border-bottom: 1px solid var(--color-background-table-thead); }
color: var(--color-text); }
.pure-table td,
.pure-table th {
border-left-color: var(--color-border-table-cell); }

View File

@@ -163,6 +163,7 @@ class ChangeDetectionStore:
del (update_obj[dict_key])
self.__data['watching'][uuid].update(update_obj)
self.needs_write = True
@property
@@ -375,6 +376,46 @@ class ChangeDetectionStore:
return False
# Save as PNG, PNG is larger but better for doing visual diff in the future
def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False):
if not self.data['watching'].get(watch_uuid):
return
if as_error:
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png")
self.data['watching'][watch_uuid].ensure_data_dir_exists()
with open(target_path, 'wb') as f:
f.write(screenshot)
f.close()
def save_error_text(self, watch_uuid, contents):
if not self.data['watching'].get(watch_uuid):
return
self.data['watching'][watch_uuid].ensure_data_dir_exists()
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
with open(target_path, 'w') as f:
f.write(contents)
def save_xpath_data(self, watch_uuid, data, as_error=False):
if not self.data['watching'].get(watch_uuid):
return
if as_error:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
self.data['watching'][watch_uuid].ensure_data_dir_exists()
with open(target_path, 'w') as f:
f.write(json.dumps(data))
f.close()
def sync_to_json(self):
logger.info("Saving JSON..")
try:

View File

@@ -26,11 +26,7 @@
<meta name="msapplication-TileColor" content="#da532c">
<meta name="msapplication-config" content="favicons/browserconfig.xml">
<meta name="theme-color" content="#ffffff">
<script>
const csrftoken="{{ csrf_token() }}";
</script>
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='csrf.js')}}" defer></script>
</head>
<body>

View File

@@ -292,7 +292,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
<ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}

View File

@@ -68,11 +68,11 @@
{% set link_order = "desc" if sort_order == 'asc' else "asc" %}
{% set arrow_span = "" %}
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
<th class="empty-cell"></th>
<th></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
<th class="empty-cell"></th>
<th></th>
</tr>
</thead>
<tbody>

View File

@@ -149,15 +149,6 @@ def test_api_simple(client, live_server):
headers={'x-api-key': api_key},
)
assert b'which has this one new line' in res.data
assert b'<div id' not in res.data
# Fetch the HTML of the latest one
res = client.get(
url_for("watchsinglehistory", uuid=watch_uuid, timestamp='latest')+"?html=1",
headers={'x-api-key': api_key},
)
assert b'which has this one new line' in res.data
assert b'<div id' in res.data
# Fetch the whole watch
res = client.get(

View File

@@ -2,12 +2,13 @@
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks
from . util import live_server_setup
def test_basic_auth(client, live_server):
live_server_setup(live_server)
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@")
@@ -18,8 +19,8 @@ def test_basic_auth(client, live_server):
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
time.sleep(1)
# Check form validation
res = client.post(
url_for("edit_page", uuid="first"),
@@ -28,7 +29,7 @@ def test_basic_auth(client, live_server):
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
time.sleep(1)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True

View File

@@ -100,7 +100,7 @@ def test_check_ldjson_price_autodetect(client, live_server):
# Accept it
uuid = extract_UUID_from_client(client)
time.sleep(1)
client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
wait_for_all_checks(client)

View File

@@ -62,6 +62,9 @@ def test_check_basic_change_detection_functionality(client, live_server):
# Make a change
set_modified_response()
res = urlopen(url_for('test_endpoint', _external=True))
assert b'which has this one new line' in res.read()
# Force recheck
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data

View File

@@ -3,7 +3,7 @@
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks
from .util import live_server_setup
import pytest
@@ -27,6 +27,9 @@ def set_html_response():
def test_check_encoding_detection(client, live_server):
set_html_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
client.post(
@@ -36,7 +39,7 @@ def test_check_encoding_detection(client, live_server):
)
# Give the thread time to pick it up
wait_for_all_checks(client)
time.sleep(2)
res = client.get(
url_for("preview_page", uuid="first"),
@@ -53,6 +56,9 @@ def test_check_encoding_detection(client, live_server):
def test_check_encoding_detection_missing_content_type_header(client, live_server):
set_html_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
client.post(
@@ -61,7 +67,8 @@ def test_check_encoding_detection_missing_content_type_header(client, live_serve
follow_redirects=True
)
wait_for_all_checks(client)
# Give the thread time to pick it up
time.sleep(2)
res = client.get(
url_for("preview_page", uuid="first"),

View File

@@ -29,7 +29,6 @@ def test_check_extract_text_from_diff(client, live_server):
# Load in 5 different numbers/changes
last_date=""
for n in range(5):
time.sleep(1)
# Give the thread time to pick it up
print("Bumping snapshot and checking.. ", n)
last_date = str(time.time())

View File

@@ -21,11 +21,10 @@ def set_response_with_filter():
f.write(test_return_data)
return None
def run_filter_test(client, live_server, content_filter):
# Response WITHOUT the filter ID element
set_original_response()
def run_filter_test(client, content_filter):
# Give the endpoint time to spin up
time.sleep(1)
# cleanup for the next
client.get(
url_for("form_delete", uuid="all"),
@@ -80,7 +79,6 @@ def run_filter_test(client, live_server, content_filter):
"include_filters": content_filter,
"fetch_backend": "html_requests"})
# A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts)
res = client.post(
url_for("edit_page", uuid="first"),
data=notification_form_data,
@@ -93,21 +91,20 @@ def run_filter_test(client, live_server, content_filter):
# Now the notification should not exist, because we didnt reach the threshold
assert not os.path.isfile("test-datastore/notification.txt")
# recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented)
# -2 because we would have checked twice above (on adding and on edit)
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2):
client.get(url_for("form_watch_checknow"), follow_redirects=True)
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(2) # delay for apprise to fire
assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}"
assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i}"
# We should see something in the frontend
res = client.get(url_for("index"))
assert b'Warning, no filters were found' in res.data
# One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
# One more check should trigger it (see -2 above)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(2) # delay for apprise to fire
# Now it should exist and contain our "filter not found" alert
assert os.path.isfile("test-datastore/notification.txt")
@@ -152,9 +149,13 @@ def test_setup(live_server):
live_server_setup(live_server)
def test_check_include_filters_failure_notification(client, live_server):
run_filter_test(client, live_server,'#nope-doesnt-exist')
set_original_response()
wait_for_all_checks(client)
run_filter_test(client, '#nope-doesnt-exist')
def test_check_xpath_filter_failure_notification(client, live_server):
run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]')
set_original_response()
time.sleep(1)
run_filter_test(client, '//*[@id="nope-doesnt-exist"]')
# Test that notification is never sent

View File

@@ -5,13 +5,15 @@ import os
import json
import logging
from flask import url_for
from .util import live_server_setup, wait_for_all_checks
from .util import live_server_setup
from urllib.parse import urlparse, parse_qs
def test_consistent_history(client, live_server):
live_server_setup(live_server)
r = range(1, 30)
# Give the endpoint time to spin up
time.sleep(1)
r = range(1, 50)
for one in r:
test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
@@ -23,8 +25,15 @@ def test_consistent_history(client, live_server):
assert b"1 Imported" in res.data
wait_for_all_checks(client)
time.sleep(3)
while True:
res = client.get(url_for("index"))
logging.debug("Waiting for 'Checking now' to go away..")
if b'Checking now' not in res.data:
break
time.sleep(0.5)
time.sleep(3)
# Essentially just triggers the DB write/update
res = client.post(
url_for("settings_page"),
@@ -35,9 +44,8 @@ def test_consistent_history(client, live_server):
)
assert b"Settings updated." in res.data
time.sleep(2)
# Give it time to write it out
time.sleep(3)
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
json_obj = None
@@ -50,7 +58,7 @@ def test_consistent_history(client, live_server):
# each one should have a history.txt containing just one line
for w in json_obj['watching'].keys():
history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"
assert os.path.isfile(history_txt_index_file), "History.txt should exist where I expect it - {}".format(history_txt_index_file)
# Same like in model.Watch
with open(history_txt_index_file, "r") as f:
@@ -62,15 +70,15 @@ def test_consistent_history(client, live_server):
w))
# Find the snapshot one
for fname in files_in_watch_dir:
if fname != 'history.txt' and 'html' not in fname:
if fname != 'history.txt':
# contents should match what we requested as content returned from the test url
with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f:
contents = snapshot_f.read()
watch_url = json_obj['watching'][w]['url']
u = urlparse(watch_url)
q = parse_qs(u[4])
assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}"
assert q['content'][0] == contents.strip(), "Snapshot file {} should contain {}".format(fname, q['content'][0])
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
assert len(files_in_watch_dir) == 2, "Should be just two files in the dir, history.txt and the snapshot"

View File

@@ -45,6 +45,7 @@ def test_highlight_ignore(client, live_server):
)
res = client.get(url_for("edit_page", uuid=uuid))
# should be a regex now
assert b'/oh\ yeah\ \d+/' in res.data
@@ -54,7 +55,3 @@ def test_highlight_ignore(client, live_server):
# And it should register in the preview page
res = client.get(url_for("preview_page", uuid=uuid))
assert b'<div class="ignored">oh yeah 456' in res.data
# Should be in base.html
assert b'csrftoken' in res.data

View File

@@ -41,26 +41,19 @@ and it can also be repeated
from .. import html_tools
# See that we can find the second <script> one, which is not broken, and matches our filter
text = html_tools.extract_json_as_string(content, "json:$.offers.priceCurrency")
assert text == '"AUD"'
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
assert text == "5"
text = html_tools.extract_json_as_string(content, "json:$.offers.price")
assert text == "23.5"
# also check for jq
if jq_support:
text = html_tools.extract_json_as_string(content, "jq:.offers.priceCurrency")
assert text == '"AUD"'
text = html_tools.extract_json_as_string(content, "jq:.offers.price")
assert text == "23.5"
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
assert text == "5"
text = html_tools.extract_json_as_string(content, "jqraw:.offers.priceCurrency")
assert text == "AUD"
text = html_tools.extract_json_as_string('{"id":5}', "jqraw:.id")
assert text == "5"
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
assert text == "5"
# When nothing at all is found, it should throw JSONNOTFound
# Which is caught and shown to the user in the watch-overview table
@@ -71,9 +64,6 @@ and it can also be repeated
with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jqraw:.id")
def test_unittest_inline_extract_body():
content = """
@@ -301,10 +291,6 @@ def test_check_jq_filter(client, live_server):
if jq_support:
check_json_filter('jq:.boss.name', client, live_server)
def test_check_jqraw_filter(client, live_server):
if jq_support:
check_json_filter('jqraw:.boss.name', client, live_server)
def check_json_filter_bool_val(json_filter, client, live_server):
set_original_response()
@@ -359,10 +345,6 @@ def test_check_jq_filter_bool_val(client, live_server):
if jq_support:
check_json_filter_bool_val("jq:.available", client, live_server)
def test_check_jqraw_filter_bool_val(client, live_server):
if jq_support:
check_json_filter_bool_val("jq:.available", client, live_server)
# Re #265 - Extended JSON selector test
# Stuff to consider here
# - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
@@ -509,8 +491,4 @@ def test_check_jsonpath_ext_filter(client, live_server):
def test_check_jq_ext_filter(client, live_server):
if jq_support:
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
def test_check_jqraw_ext_filter(client, live_server):
if jq_support:
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)

View File

@@ -69,7 +69,6 @@ def test_rss_and_token(client, live_server):
wait_for_all_checks(client)
set_modified_response()
time.sleep(1)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
@@ -88,7 +87,7 @@ def test_rss_and_token(client, live_server):
assert b"Access denied, bad token" not in res.data
assert b"Random content" in res.data
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_basic_cdata_rss_markup(client, live_server):
#live_server_setup(live_server)

View File

@@ -1,12 +1,11 @@
from . import content_fetchers
from .processors.restock_diff import UnableToExtractRestockData
from .processors.text_json_diff import FilterNotFoundInResponse
from changedetectionio import html_tools
from copy import deepcopy
import os
import queue
import threading
import queue
import time
from . import content_fetchers
from changedetectionio import html_tools
from .processors.text_json_diff import FilterNotFoundInResponse
from .processors.restock_diff import UnableToExtractRestockData
# A single update worker
#
@@ -246,19 +245,14 @@ class update_worker(threading.Thread):
contents = b''
process_changedetection_results = True
update_obj = {}
# Clear last errors (move to preflight func?)
self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
# DeepCopy so we can be sure we don't accidently change anything by reference
watch = deepcopy(self.datastore.data['watching'].get(uuid))
logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
logger.info(f"Processing watch UUID {uuid} "
f"Priority {queued_item_data.priority} "
f"URL {self.datastore.data['watching'][uuid]['url']}")
now = time.time()
try:
# Processor is what we are using for detecting the "Change"
processor = watch.get('processor', 'text_json_diff')
processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff')
# if system...
# Abort processing when the content was the same as the last fetch
@@ -278,12 +272,14 @@ class update_worker(threading.Thread):
watch_uuid=uuid
)
# Clear last errors (move to preflight func?)
self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
update_handler.call_browser()
changed_detected, update_obj, contents = update_handler.run_changedetection(
watch=watch,
skip_when_checksum_same=skip_when_same_checksum,
)
changed_detected, update_obj, contents = update_handler.run_changedetection(uuid,
skip_when_checksum_same=skip_when_same_checksum,
)
# Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
@@ -313,8 +309,7 @@ class update_worker(threading.Thread):
})
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
process_changedetection_results = False
except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
@@ -330,11 +325,11 @@ class update_worker(threading.Thread):
err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data, as_error=True)
self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True)
if e.page_text:
watch.save_error_text(contents=e.page_text)
self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
process_changedetection_results = False
@@ -345,19 +340,17 @@ class update_worker(threading.Thread):
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
# Only when enabled, send the notification
if watch.get('filter_failure_notification_send', False):
c = watch.get('consecutive_filter_failures', 5)
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
c += 1
# Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0)
logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
logger.error(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
if threshold > 0 and c >= threshold:
if not watch.get('notification_muted'):
if not self.datastore.data['watching'][uuid].get('notification_muted'):
self.send_filter_failure_notification(uuid)
c = 0
@@ -369,6 +362,7 @@ class update_worker(threading.Thread):
# Yes fine, so nothing todo, don't continue to process.
process_changedetection_results = False
changed_detected = False
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': False})
except content_fetchers.exceptions.BrowserConnectError as e:
self.datastore.update_watch(uuid=uuid,
update_obj={'last_error': e.msg
@@ -407,15 +401,15 @@ class update_worker(threading.Thread):
}
)
if watch.get('filter_failure_notification_send', False):
c = watch.get('consecutive_filter_failures', 5)
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
c += 1
# Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0)
logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
if threshold > 0 and c >= threshold:
if not watch.get('notification_muted'):
if not self.datastore.data['watching'][uuid].get('notification_muted'):
self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
c = 0
@@ -437,7 +431,7 @@ class update_worker(threading.Thread):
except content_fetchers.exceptions.JSActionExceptions as e:
err_text = "Error running JS Actions - Page request - "+e.message
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
@@ -447,7 +441,7 @@ class update_worker(threading.Thread):
err_text = "{} - {}".format(err_text, e.message)
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code,
@@ -471,6 +465,8 @@ class update_worker(threading.Thread):
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
# Other serious error
process_changedetection_results = False
# import traceback
# print(traceback.format_exc())
else:
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
@@ -478,7 +474,7 @@ class update_worker(threading.Thread):
continue
# Mark that we never had any failures
if not watch.get('ignore_status_codes'):
if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
update_obj['consecutive_filter_failures'] = 0
# Everything ran OK, clean off any previous error
@@ -486,48 +482,25 @@ class update_worker(threading.Thread):
self.cleanup_error_artifacts(uuid)
if not self.datastore.data['watching'].get(uuid):
continue
#
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
if process_changedetection_results:
# Always save the screenshot if it's available
if update_handler.screenshot:
watch.save_screenshot(screenshot=update_handler.screenshot)
if update_handler.xpath_data:
watch.save_xpath_data(data=update_handler.xpath_data)
try:
watch = self.datastore.data['watching'].get(uuid)
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
# Also save the snapshot on the first time checked
if changed_detected or not watch.get('last_checked'):
timestamp = round(time.time())
# Small hack so that we sleep just enough to allow 1 second between history snapshots
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
if watch.newest_history_key and int(timestamp) == int(watch.newest_history_key):
logger.warning(
f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
timestamp = str(int(timestamp) + 1)
time.sleep(1)
if changed_detected or not watch['last_checked']:
watch.save_history_text(contents=contents,
timestamp=timestamp,
timestamp=str(round(time.time())),
snapshot_id=update_obj.get('previous_md5', 'none'))
if update_handler.fetcher.content:
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=timestamp)
# A change was detected
if changed_detected:
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
if watch.history_n >= 2:
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
if not watch.get('notification_muted'):
if not self.datastore.data['watching'][uuid].get('notification_muted'):
self.send_content_changed_notification(watch_uuid=uuid)
else:
logger.info(f"Change triggered in UUID {uuid} due to first history saving (no notifications sent) - {watch['url']}")
@@ -538,23 +511,29 @@ class update_worker(threading.Thread):
logger.critical(str(e))
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
if self.datastore.data['watching'].get(uuid):
# Always record that we atleast tried
count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1
# Always record that we atleast tried
count = watch.get('check_count', 0) + 1
# Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
try:
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
self.datastore.update_watch(uuid=uuid,
update_obj={'remote_server_reply': server_header}
)
except Exception as e:
pass
# Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
try:
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
self.datastore.update_watch(uuid=uuid,
update_obj={'remote_server_reply': server_header}
)
except Exception as e:
pass
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
'last_checked': round(time.time()),
'check_count': count
})
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
'last_checked': round(time.time()),
'check_count': count
})
# Always save the screenshot if it's available
if update_handler.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
if update_handler.xpath_data:
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
self.current_uuid = None # Done

View File

@@ -68,10 +68,9 @@ services:
# If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that
# and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used)
# depends_on:
# playwright-chrome:
# condition: service_started
# depends_on:
# browser-chrome:
# condition: service_started
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
# RECOMMENDED FOR FETCHING PAGES WITH CHROME

View File

@@ -1,7 +1,7 @@
# Used by Pyppeteer
pyee
eventlet>=0.36.1 # fixes SSL error on Python 3.12
eventlet==0.35.2 # related to dnspython fixes
feedgen~=0.9
flask-compress
# 0.6.3 included compatibility fix for werkzeug 3.x (2.x had deprecation of url handlers)
@@ -23,13 +23,13 @@ validators~=0.21
brotli~=1.0
requests[socks]
urllib3==1.26.19
urllib3==1.26.18
chardet>2.3.0
wtforms~=3.0
jsonpath-ng~=1.5.3
dnspython==2.6.1 # related to eventlet fixes
dnspython==2.6.1
# jq not available on Windows so must be installed manually
@@ -41,8 +41,10 @@ apprise~=1.8.0
# use v1.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
paho-mqtt>=1.6.1,<2.0.0
# Requires extra wheel for rPi
cryptography~=42.0.8
# This mainly affects some ARM builds, which unlike the other builds ignores "ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1"
# so without this pinning, the newer versions on ARM will forcefully try to build rust, which results in "rust compiler not found"
# (introduced once apprise became a dep)
cryptography~=3.4
# Used for CSS filtering
beautifulsoup4
@@ -83,4 +85,4 @@ jsonschema==4.17.3
loguru
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
greenlet >= 3.0.3
greenlet >= 3.0.3