mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-10 11:36:55 +00:00
Compare commits
21 Commits
multiple-t
...
0.45.25
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bed16009bb | ||
|
|
faeed78ffb | ||
|
|
5d9081ccb2 | ||
|
|
2cf1829073 | ||
|
|
526551a205 | ||
|
|
ba139e7f3f | ||
|
|
13e343f9da | ||
|
|
13be4623db | ||
|
|
3b19e3d2bf | ||
|
|
ce42f8ea26 | ||
|
|
343e359b39 | ||
|
|
ffd160ce0e | ||
|
|
d31fc860cc | ||
|
|
90b357f457 | ||
|
|
cc147be76e | ||
|
|
8ae5ed76ce | ||
|
|
a9ed113369 | ||
|
|
eacf920b9a | ||
|
|
c9af9b6374 | ||
|
|
5e65fb606b | ||
|
|
434a1b242e |
4
.github/workflows/containers.yml
vendored
4
.github/workflows/containers.yml
vendored
@@ -88,7 +88,7 @@ jobs:
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref }} == "refs/heads/master"
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -106,7 +106,7 @@ jobs:
|
||||
- name: Build and push :tag
|
||||
id: docker_build_tag_release
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
|
||||
4
.github/workflows/test-container-build.yml
vendored
4
.github/workflows/test-container-build.yml
vendored
@@ -51,7 +51,7 @@ jobs:
|
||||
# Check we can still build under alpine/musl
|
||||
- name: Test that the docker containers can build (musl via alpine check)
|
||||
id: docker_build_musl
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./.github/test/Dockerfile-alpine
|
||||
@@ -59,7 +59,7 @@ jobs:
|
||||
|
||||
- name: Test that the docker containers can build
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
# https://github.com/docker/build-push-action#customizing
|
||||
with:
|
||||
context: ./
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
# @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py
|
||||
# If you know how to fix it, please do! and test it for both 3.10 and 3.11
|
||||
|
||||
ARG PYTHON_VERSION=3.10
|
||||
ARG PYTHON_VERSION=3.11
|
||||
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm as builder
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
|
||||
|
||||
# See `cryptography` pin comment in requirements.txt
|
||||
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
@@ -26,7 +26,8 @@ WORKDIR /install
|
||||
|
||||
COPY requirements.txt /requirements.txt
|
||||
|
||||
RUN pip install --target=/dependencies -r /requirements.txt
|
||||
# --extra-index-url https://www.piwheels.org/simple is for cryptography module to be prebuilt (or rustc etc needs to be installed)
|
||||
RUN pip install --extra-index-url https://www.piwheels.org/simple --target=/dependencies -r /requirements.txt
|
||||
|
||||
# Playwright is an alternative to Selenium
|
||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.45.23'
|
||||
__version__ = '0.45.25'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -170,23 +170,33 @@ class WatchSingleHistory(Resource):
|
||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
|
||||
@apiName Get single snapshot content
|
||||
@apiGroup Watch History
|
||||
@apiParam {String} [html] Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp)
|
||||
@apiSuccess (200) {String} OK
|
||||
@apiSuccess (404) {String} ERR Not found
|
||||
"""
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
abort(404, message=f"No watch exists with the UUID of {uuid}")
|
||||
|
||||
if not len(watch.history):
|
||||
abort(404, message='Watch found but no history exists for the UUID {}'.format(uuid))
|
||||
abort(404, message=f"Watch found but no history exists for the UUID {uuid}")
|
||||
|
||||
if timestamp == 'latest':
|
||||
timestamp = list(watch.history.keys())[-1]
|
||||
|
||||
content = watch.get_history_snapshot(timestamp)
|
||||
if request.args.get('html'):
|
||||
content = watch.get_fetched_html(timestamp)
|
||||
if content:
|
||||
response = make_response(content, 200)
|
||||
response.mimetype = "text/html"
|
||||
else:
|
||||
response = make_response("No content found", 404)
|
||||
response.mimetype = "text/plain"
|
||||
else:
|
||||
content = watch.get_history_snapshot(timestamp)
|
||||
response = make_response(content, 200)
|
||||
response.mimetype = "text/plain"
|
||||
|
||||
response = make_response(content, 200)
|
||||
response.mimetype = "text/plain"
|
||||
return response
|
||||
|
||||
|
||||
|
||||
@@ -187,8 +187,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||
if is_last_step and u:
|
||||
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data()
|
||||
datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
|
||||
datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch:
|
||||
watch.save_screenshot(screenshot=screenshot)
|
||||
watch.save_xpath_data(data=xpath_data)
|
||||
|
||||
# if not this_session.page:
|
||||
# cleanup_playwright_session()
|
||||
|
||||
@@ -255,8 +255,8 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
|
||||
def get_current_state(self):
|
||||
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
|
||||
from pkg_resources import resource_string
|
||||
xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8')
|
||||
import importlib.resources
|
||||
xpath_element_js = importlib.resources.read_text("changedetectionio.content_fetchers.res", "xpath_element_scraper.js")
|
||||
now = time.time()
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
@@ -287,11 +287,9 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
:param current_include_filters:
|
||||
:return:
|
||||
"""
|
||||
|
||||
import importlib.resources
|
||||
self.page.evaluate("var include_filters=''")
|
||||
from pkg_resources import resource_string
|
||||
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
|
||||
xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8')
|
||||
xpath_element_js = importlib.resources.read_text("changedetectionio.content_fetchers.res", "xpath_element_scraper.js")
|
||||
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
||||
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
|
||||
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
||||
|
||||
@@ -63,7 +63,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
<ul>
|
||||
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
|
||||
{% if jq_support %}
|
||||
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
|
||||
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
|
||||
{% else %}
|
||||
<li>jq support not installed</li>
|
||||
{% endif %}
|
||||
|
||||
@@ -64,10 +64,9 @@ class Fetcher():
|
||||
render_extract_delay = 0
|
||||
|
||||
def __init__(self):
|
||||
from pkg_resources import resource_string
|
||||
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
|
||||
self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8')
|
||||
self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8')
|
||||
import importlib.resources
|
||||
self.xpath_element_js = importlib.resources.read_text("changedetectionio.content_fetchers.res", 'xpath_element_scraper.js')
|
||||
self.instock_data_js = importlib.resources.read_text("changedetectionio.content_fetchers.res", 'stock-not-in-stock.js')
|
||||
|
||||
@abstractmethod
|
||||
def get_error(self):
|
||||
|
||||
1
changedetectionio/content_fetchers/res/__init__.py
Normal file
1
changedetectionio/content_fetchers/res/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# resources for browser injection/scraping
|
||||
@@ -30,14 +30,21 @@ function isItemInStock() {
|
||||
'dieser artikel ist bald wieder verfügbar',
|
||||
'dostępne wkrótce',
|
||||
'en rupture de stock',
|
||||
'ist derzeit nicht auf lager',
|
||||
'isn\'t in stock right now',
|
||||
'isnt in stock right now',
|
||||
'isn’t in stock right now',
|
||||
'item is no longer available',
|
||||
'let me know when it\'s available',
|
||||
'mail me when available',
|
||||
'message if back in stock',
|
||||
'nachricht bei',
|
||||
'nicht auf lager',
|
||||
'nicht lagernd',
|
||||
'nicht lieferbar',
|
||||
'nicht verfügbar',
|
||||
'nicht vorrätig',
|
||||
'nicht zur verfügung',
|
||||
'nie znaleziono produktów',
|
||||
'niet beschikbaar',
|
||||
'niet leverbaar',
|
||||
'niet op voorraad',
|
||||
@@ -48,6 +55,7 @@ function isItemInStock() {
|
||||
'not currently available',
|
||||
'not in stock',
|
||||
'notify me when available',
|
||||
'notify me',
|
||||
'notify when available',
|
||||
'não estamos a aceitar encomendas',
|
||||
'out of stock',
|
||||
@@ -62,12 +70,16 @@ function isItemInStock() {
|
||||
'this item is currently unavailable',
|
||||
'tickets unavailable',
|
||||
'tijdelijk uitverkocht',
|
||||
'unavailable nearby',
|
||||
'unavailable tickets',
|
||||
'vergriffen',
|
||||
'vorbestellen',
|
||||
'vorbestellung ist bald möglich',
|
||||
'we couldn\'t find any products that match',
|
||||
'we do not currently have an estimate of when this product will be back in stock.',
|
||||
'we don\'t know when or if this item will be back in stock.',
|
||||
'we were not able to find a match',
|
||||
'when this arrives in stock',
|
||||
'zur zeit nicht an lager',
|
||||
'品切れ',
|
||||
'已售',
|
||||
|
||||
@@ -679,7 +679,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
if request.method == 'POST' and form.validate():
|
||||
|
||||
extra_update_obj = {}
|
||||
extra_update_obj = {
|
||||
'consecutive_filter_failures': 0,
|
||||
'last_error' : False
|
||||
}
|
||||
|
||||
if request.args.get('unpause_on_save'):
|
||||
extra_update_obj['paused'] = False
|
||||
@@ -718,7 +721,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
datastore.data['watching'][uuid].update(extra_update_obj)
|
||||
|
||||
if request.args.get('unpause_on_save'):
|
||||
flash("Updated watch - unpaused!.")
|
||||
flash("Updated watch - unpaused!")
|
||||
else:
|
||||
flash("Updated watch.")
|
||||
|
||||
|
||||
@@ -3,8 +3,6 @@ from bs4 import BeautifulSoup
|
||||
from inscriptis import get_text
|
||||
from jsonpath_ng.ext import parse
|
||||
from typing import List
|
||||
from inscriptis.css_profiles import CSS_PROFILES, HtmlElement
|
||||
from inscriptis.html_properties import Display
|
||||
from inscriptis.model.config import ParserConfig
|
||||
from xml.sax.saxutils import escape as xml_escape
|
||||
import json
|
||||
@@ -196,12 +194,12 @@ def extract_element(find='title', html_content=''):
|
||||
|
||||
#
|
||||
def _parse_json(json_data, json_filter):
|
||||
if 'json:' in json_filter:
|
||||
if json_filter.startswith("json:"):
|
||||
jsonpath_expression = parse(json_filter.replace('json:', ''))
|
||||
match = jsonpath_expression.find(json_data)
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
|
||||
if 'jq:' in json_filter:
|
||||
if json_filter.startswith("jq:") or json_filter.startswith("jqraw:"):
|
||||
|
||||
try:
|
||||
import jq
|
||||
@@ -209,10 +207,15 @@ def _parse_json(json_data, json_filter):
|
||||
# `jq` requires full compilation in windows and so isn't generally available
|
||||
raise Exception("jq not support not found")
|
||||
|
||||
jq_expression = jq.compile(json_filter.replace('jq:', ''))
|
||||
match = jq_expression.input(json_data).all()
|
||||
if json_filter.startswith("jq:"):
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jq:"))
|
||||
match = jq_expression.input(json_data).all()
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
if json_filter.startswith("jqraw:"):
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
|
||||
match = jq_expression.input(json_data).all()
|
||||
return '\n'.join(str(item) for item in match)
|
||||
|
||||
def _get_stripped_text_from_json_match(match):
|
||||
s = []
|
||||
|
||||
@@ -5,6 +5,7 @@ from changedetectionio.notification import (
|
||||
default_notification_title,
|
||||
)
|
||||
|
||||
# Equal to or greater than this number of FilterNotFoundInResponse exceptions will trigger a filter-not-found notification
|
||||
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
|
||||
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
|
||||
|
||||
|
||||
@@ -328,14 +328,9 @@ class model(dict):
|
||||
def save_history_text(self, contents, timestamp, snapshot_id):
|
||||
import brotli
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
|
||||
|
||||
# Small hack so that we sleep just enough to allow 1 second between history snapshots
|
||||
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
|
||||
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
|
||||
logger.warning(f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
|
||||
timestamp = str(int(timestamp) + 1)
|
||||
time.sleep(1)
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||
@@ -528,8 +523,42 @@ class model(dict):
|
||||
# None is set
|
||||
return False
|
||||
|
||||
def save_error_text(self, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
with open(target_path, 'w') as f:
|
||||
f.write(contents)
|
||||
|
||||
def get_last_fetched_before_filters(self):
|
||||
def save_xpath_data(self, data, as_error=False):
|
||||
import json
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(self.watch_data_dir, "elements-error.json")
|
||||
else:
|
||||
target_path = os.path.join(self.watch_data_dir, "elements.json")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
with open(target_path, 'w') as f:
|
||||
f.write(json.dumps(data))
|
||||
f.close()
|
||||
|
||||
# Save as PNG, PNG is larger but better for doing visual diff in the future
|
||||
def save_screenshot(self, screenshot: bytes, as_error=False):
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
|
||||
else:
|
||||
target_path = os.path.join(self.watch_data_dir, "last-screenshot.png")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
with open(target_path, 'wb') as f:
|
||||
f.write(screenshot)
|
||||
f.close()
|
||||
|
||||
|
||||
def get_last_fetched_text_before_filters(self):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
|
||||
@@ -544,12 +573,56 @@ class model(dict):
|
||||
with open(filepath, 'rb') as f:
|
||||
return(brotli.decompress(f.read()).decode('utf-8'))
|
||||
|
||||
def save_last_fetched_before_filters(self, contents):
|
||||
def save_last_text_fetched_before_filters(self, contents):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
|
||||
|
||||
def save_last_fetched_html(self, timestamp, contents):
|
||||
import brotli
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
contents = contents.encode('utf-8') if isinstance(contents, str) else contents
|
||||
try:
|
||||
f.write(brotli.compress(contents))
|
||||
except Exception as e:
|
||||
logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}")
|
||||
logger.warning(e)
|
||||
f.write(contents)
|
||||
|
||||
self._prune_last_fetched_html_snapshots()
|
||||
|
||||
def get_fetched_html(self, timestamp):
|
||||
import brotli
|
||||
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if os.path.isfile(filepath):
|
||||
with open(filepath, 'rb') as f:
|
||||
return (brotli.decompress(f.read()).decode('utf-8'))
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _prune_last_fetched_html_snapshots(self):
|
||||
|
||||
dates = list(self.history.keys())
|
||||
dates.reverse()
|
||||
|
||||
for index, timestamp in enumerate(dates):
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
# Keep only the first 2
|
||||
if index > 1 and os.path.isfile(filepath):
|
||||
os.remove(filepath)
|
||||
|
||||
|
||||
@property
|
||||
def get_browsersteps_available_screenshots(self):
|
||||
"For knowing which screenshots are available to show the user in BrowserSteps UI"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from abc import abstractmethod
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.model import Watch
|
||||
from copy import deepcopy
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
@@ -138,7 +139,7 @@ class difference_detection_processor():
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, uuid, skip_when_checksum_same=True):
|
||||
def run_changedetection(self, watch: Watch, skip_when_checksum_same=True):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
|
||||
from . import difference_detection_processor
|
||||
from copy import deepcopy
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import urllib3
|
||||
@@ -20,10 +19,7 @@ class perform_site_check(difference_detection_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
def run_changedetection(self, uuid, skip_when_checksum_same=True):
|
||||
|
||||
# DeepCopy so we can be sure we don't accidently change anything by reference
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
@@ -44,13 +40,13 @@ class perform_site_check(difference_detection_processor):
|
||||
fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest()
|
||||
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
|
||||
update_obj["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
|
||||
logger.debug(f"Watch UUID {uuid} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
|
||||
else:
|
||||
raise UnableToExtractRestockData(status_code=self.fetcher.status_code)
|
||||
|
||||
# The main thing that all this at the moment comes down to :)
|
||||
changed_detected = False
|
||||
logger.debug(f"Watch UUID {uuid} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||
|
||||
if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
|
||||
# Yes if we only care about it going to instock, AND we are in stock
|
||||
|
||||
@@ -10,18 +10,17 @@ from . import difference_detection_processor
|
||||
from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
|
||||
from changedetectionio import html_tools, content_fetchers
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||
import changedetectionio.content_fetchers
|
||||
from copy import deepcopy
|
||||
from loguru import logger
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
name = 'Webpage Text/HTML, JSON and PDF changes'
|
||||
description = 'Detects all text changes where possible'
|
||||
json_filter_prefixes = ['json:', 'jq:']
|
||||
json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
|
||||
|
||||
class FilterNotFoundInResponse(ValueError):
|
||||
def __init__(self, msg):
|
||||
def __init__(self, msg, screenshot=None):
|
||||
self.screenshot = screenshot
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
@@ -34,14 +33,12 @@ class PDFToHTMLToolNotFound(ValueError):
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check(difference_detection_processor):
|
||||
|
||||
def run_changedetection(self, uuid, skip_when_checksum_same=True):
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
changed_detected = False
|
||||
html_content = ""
|
||||
screenshot = False # as bytes
|
||||
stripped_text_from_html = ""
|
||||
|
||||
# DeepCopy so we can be sure we don't accidently change anything by reference
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
@@ -116,12 +113,12 @@ class perform_site_check(difference_detection_processor):
|
||||
# Better would be if Watch.model could access the global data also
|
||||
# and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__
|
||||
# https://realpython.com/inherit-python-dict/ instead of doing it procedurely
|
||||
include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='include_filters')
|
||||
include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters')
|
||||
|
||||
# 1845 - remove duplicated filters in both group and watch include filter
|
||||
include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags))
|
||||
|
||||
subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='subtractive_selectors'),
|
||||
subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'),
|
||||
*watch.get("subtractive_selectors", []),
|
||||
*self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
|
||||
]
|
||||
@@ -188,7 +185,7 @@ class perform_site_check(difference_detection_processor):
|
||||
append_pretty_line_formatting=not watch.is_source_type_url)
|
||||
|
||||
if not html_content.strip():
|
||||
raise FilterNotFoundInResponse(include_filters_rule)
|
||||
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot)
|
||||
|
||||
if has_subtractive_selectors:
|
||||
html_content = html_tools.element_removal(subtractive_selectors, html_content)
|
||||
@@ -222,7 +219,7 @@ class perform_site_check(difference_detection_processor):
|
||||
from .. import diff
|
||||
# needs to not include (added) etc or it may get used twice
|
||||
# Replace the processed text with the preferred result
|
||||
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_before_filters(),
|
||||
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
|
||||
newest_version_file_contents=stripped_text_from_html,
|
||||
include_equal=False, # not the same lines
|
||||
include_added=watch.get('filter_text_added', True),
|
||||
@@ -231,7 +228,7 @@ class perform_site_check(difference_detection_processor):
|
||||
line_feed_sep="\n",
|
||||
include_change_type_prefix=False)
|
||||
|
||||
watch.save_last_fetched_before_filters(text_content_before_ignored_filter)
|
||||
watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter)
|
||||
|
||||
if not rendered_diff and stripped_text_from_html:
|
||||
# We had some content, but no differences were found
|
||||
@@ -344,17 +341,17 @@ class perform_site_check(difference_detection_processor):
|
||||
if not watch['title'] or not len(watch['title']):
|
||||
update_obj['title'] = html_tools.extract_element(find='title', html_content=self.fetcher.content)
|
||||
|
||||
logger.debug(f"Watch UUID {uuid} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||
|
||||
if changed_detected:
|
||||
if watch.get('check_unique_lines', False):
|
||||
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
|
||||
# One or more lines? unsure?
|
||||
if not has_unique_lines:
|
||||
logger.debug(f"check_unique_lines: UUID {uuid} didnt have anything new setting change_detected=False")
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
|
||||
changed_detected = False
|
||||
else:
|
||||
logger.debug(f"check_unique_lines: UUID {uuid} had unique content")
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
|
||||
|
||||
# Always record the new checksum
|
||||
update_obj["previous_md5"] = fetched_md5
|
||||
|
||||
@@ -1,14 +1,5 @@
|
||||
$(document).ready(function () {
|
||||
|
||||
// duplicate
|
||||
var csrftoken = $('input[name=csrf_token]').val();
|
||||
$.ajaxSetup({
|
||||
beforeSend: function (xhr, settings) {
|
||||
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
|
||||
xhr.setRequestHeader("X-CSRFToken", csrftoken)
|
||||
}
|
||||
}
|
||||
})
|
||||
var browsersteps_session_id;
|
||||
var browser_interface_seconds_remaining = 0;
|
||||
var apply_buttons_disabled = false;
|
||||
|
||||
10
changedetectionio/static/js/csrf.js
Normal file
10
changedetectionio/static/js/csrf.js
Normal file
@@ -0,0 +1,10 @@
|
||||
$(document).ready(function () {
|
||||
$.ajaxSetup({
|
||||
beforeSend: function (xhr, settings) {
|
||||
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
|
||||
xhr.setRequestHeader("X-CSRFToken", csrftoken)
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
@@ -1,13 +1,4 @@
|
||||
$(document).ready(function () {
|
||||
var csrftoken = $('input[name=csrf_token]').val();
|
||||
$.ajaxSetup({
|
||||
beforeSend: function (xhr, settings) {
|
||||
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
|
||||
xhr.setRequestHeader("X-CSRFToken", csrftoken)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
$('.needs-localtime').each(function () {
|
||||
for (var option of this.options) {
|
||||
var dateObject = new Date(option.value * 1000);
|
||||
@@ -48,6 +39,12 @@ $(document).ready(function () {
|
||||
$("#highlightSnippet").remove();
|
||||
}
|
||||
|
||||
// Listen for Escape key press
|
||||
window.addEventListener('keydown', function (e) {
|
||||
if (e.key === 'Escape') {
|
||||
clean();
|
||||
}
|
||||
}, false);
|
||||
|
||||
function dragTextHandler(event) {
|
||||
console.log('mouseupped');
|
||||
|
||||
@@ -13,16 +13,6 @@ $(document).ready(function() {
|
||||
$('#send-test-notification').click(function (e) {
|
||||
e.preventDefault();
|
||||
|
||||
// this can be global
|
||||
var csrftoken = $('input[name=csrf_token]').val();
|
||||
$.ajaxSetup({
|
||||
beforeSend: function(xhr, settings) {
|
||||
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
|
||||
xhr.setRequestHeader("X-CSRFToken", csrftoken)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
data = {
|
||||
notification_body: $('#notification_body').val(),
|
||||
notification_format: $('#notification_format').val(),
|
||||
|
||||
@@ -671,14 +671,25 @@ footer {
|
||||
and also iPads specifically.
|
||||
*/
|
||||
.watch-table {
|
||||
/* make headings work on mobile */
|
||||
thead {
|
||||
display: block;
|
||||
tr {
|
||||
th {
|
||||
display: inline-block;
|
||||
}
|
||||
}
|
||||
.empty-cell {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
|
||||
/* Force table to not be like tables anymore */
|
||||
thead,
|
||||
tbody,
|
||||
th,
|
||||
td,
|
||||
tr {
|
||||
display: block;
|
||||
tbody {
|
||||
td,
|
||||
tr {
|
||||
display: block;
|
||||
}
|
||||
}
|
||||
|
||||
.last-checked {
|
||||
@@ -702,13 +713,6 @@ footer {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
/* Hide table headers (but not display: none;, for accessibility) */
|
||||
thead tr {
|
||||
position: absolute;
|
||||
top: -9999px;
|
||||
left: -9999px;
|
||||
}
|
||||
|
||||
.pure-table td,
|
||||
.pure-table th {
|
||||
border: none;
|
||||
@@ -753,6 +757,7 @@ footer {
|
||||
thead {
|
||||
background-color: var(--color-background-table-thead);
|
||||
color: var(--color-text);
|
||||
border-bottom: 1px solid var(--color-background-table-thead);
|
||||
}
|
||||
|
||||
td,
|
||||
|
||||
@@ -863,14 +863,17 @@ footer {
|
||||
and also iPads specifically.
|
||||
*/
|
||||
.watch-table {
|
||||
/* make headings work on mobile */
|
||||
/* Force table to not be like tables anymore */
|
||||
/* Force table to not be like tables anymore */
|
||||
/* Hide table headers (but not display: none;, for accessibility) */ }
|
||||
.watch-table thead,
|
||||
.watch-table tbody,
|
||||
.watch-table th,
|
||||
.watch-table td,
|
||||
.watch-table tr {
|
||||
/* Force table to not be like tables anymore */ }
|
||||
.watch-table thead {
|
||||
display: block; }
|
||||
.watch-table thead tr th {
|
||||
display: inline-block; }
|
||||
.watch-table thead .empty-cell {
|
||||
display: none; }
|
||||
.watch-table tbody td,
|
||||
.watch-table tbody tr {
|
||||
display: block; }
|
||||
.watch-table .last-checked > span {
|
||||
vertical-align: middle; }
|
||||
@@ -882,10 +885,6 @@ footer {
|
||||
content: "Last Changed "; }
|
||||
.watch-table td.inline {
|
||||
display: inline-block; }
|
||||
.watch-table thead tr {
|
||||
position: absolute;
|
||||
top: -9999px;
|
||||
left: -9999px; }
|
||||
.watch-table .pure-table td,
|
||||
.watch-table .pure-table th {
|
||||
border: none; }
|
||||
@@ -912,7 +911,8 @@ footer {
|
||||
border-color: var(--color-border-table-cell); }
|
||||
.pure-table thead {
|
||||
background-color: var(--color-background-table-thead);
|
||||
color: var(--color-text); }
|
||||
color: var(--color-text);
|
||||
border-bottom: 1px solid var(--color-background-table-thead); }
|
||||
.pure-table td,
|
||||
.pure-table th {
|
||||
border-left-color: var(--color-border-table-cell); }
|
||||
|
||||
@@ -163,7 +163,6 @@ class ChangeDetectionStore:
|
||||
del (update_obj[dict_key])
|
||||
|
||||
self.__data['watching'][uuid].update(update_obj)
|
||||
|
||||
self.needs_write = True
|
||||
|
||||
@property
|
||||
@@ -376,46 +375,6 @@ class ChangeDetectionStore:
|
||||
|
||||
return False
|
||||
|
||||
# Save as PNG, PNG is larger but better for doing visual diff in the future
|
||||
def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False):
|
||||
if not self.data['watching'].get(watch_uuid):
|
||||
return
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png")
|
||||
else:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png")
|
||||
|
||||
self.data['watching'][watch_uuid].ensure_data_dir_exists()
|
||||
|
||||
with open(target_path, 'wb') as f:
|
||||
f.write(screenshot)
|
||||
f.close()
|
||||
|
||||
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
if not self.data['watching'].get(watch_uuid):
|
||||
return
|
||||
|
||||
self.data['watching'][watch_uuid].ensure_data_dir_exists()
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
|
||||
with open(target_path, 'w') as f:
|
||||
f.write(contents)
|
||||
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
|
||||
if not self.data['watching'].get(watch_uuid):
|
||||
return
|
||||
if as_error:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
|
||||
else:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
|
||||
self.data['watching'][watch_uuid].ensure_data_dir_exists()
|
||||
with open(target_path, 'w') as f:
|
||||
f.write(json.dumps(data))
|
||||
f.close()
|
||||
|
||||
|
||||
def sync_to_json(self):
|
||||
logger.info("Saving JSON..")
|
||||
try:
|
||||
|
||||
@@ -26,7 +26,11 @@
|
||||
<meta name="msapplication-TileColor" content="#da532c">
|
||||
<meta name="msapplication-config" content="favicons/browserconfig.xml">
|
||||
<meta name="theme-color" content="#ffffff">
|
||||
<script>
|
||||
const csrftoken="{{ csrf_token() }}";
|
||||
</script>
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='csrf.js')}}" defer></script>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
@@ -292,7 +292,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
<ul>
|
||||
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
|
||||
{% if jq_support %}
|
||||
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
|
||||
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
|
||||
{% else %}
|
||||
<li>jq support not installed</li>
|
||||
{% endif %}
|
||||
|
||||
@@ -68,11 +68,11 @@
|
||||
{% set link_order = "desc" if sort_order == 'asc' else "asc" %}
|
||||
{% set arrow_span = "" %}
|
||||
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th></th>
|
||||
<th class="empty-cell"></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th></th>
|
||||
<th class="empty-cell"></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
|
||||
@@ -149,6 +149,15 @@ def test_api_simple(client, live_server):
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
assert b'which has this one new line' in res.data
|
||||
assert b'<div id' not in res.data
|
||||
|
||||
# Fetch the HTML of the latest one
|
||||
res = client.get(
|
||||
url_for("watchsinglehistory", uuid=watch_uuid, timestamp='latest')+"?html=1",
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
assert b'which has this one new line' in res.data
|
||||
assert b'<div id' in res.data
|
||||
|
||||
# Fetch the whole watch
|
||||
res = client.get(
|
||||
|
||||
@@ -2,13 +2,12 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from . util import live_server_setup
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
def test_basic_auth(client, live_server):
|
||||
|
||||
live_server_setup(live_server)
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@")
|
||||
@@ -19,8 +18,8 @@ def test_basic_auth(client, live_server):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(1)
|
||||
|
||||
# Check form validation
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
@@ -29,7 +28,7 @@ def test_basic_auth(client, live_server):
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
time.sleep(1)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
|
||||
@@ -100,7 +100,7 @@ def test_check_ldjson_price_autodetect(client, live_server):
|
||||
|
||||
# Accept it
|
||||
uuid = extract_UUID_from_client(client)
|
||||
|
||||
time.sleep(1)
|
||||
client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
@@ -62,9 +62,6 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
||||
# Make a change
|
||||
set_modified_response()
|
||||
|
||||
res = urlopen(url_for('test_endpoint', _external=True))
|
||||
assert b'which has this one new line' in res.read()
|
||||
|
||||
# Force recheck
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -27,9 +27,6 @@ def set_html_response():
|
||||
def test_check_encoding_detection(client, live_server):
|
||||
set_html_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
||||
client.post(
|
||||
@@ -39,7 +36,7 @@ def test_check_encoding_detection(client, live_server):
|
||||
)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(2)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
@@ -56,9 +53,6 @@ def test_check_encoding_detection(client, live_server):
|
||||
def test_check_encoding_detection_missing_content_type_header(client, live_server):
|
||||
set_html_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
client.post(
|
||||
@@ -67,8 +61,7 @@ def test_check_encoding_detection_missing_content_type_header(client, live_serve
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(2)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
|
||||
@@ -29,6 +29,7 @@ def test_check_extract_text_from_diff(client, live_server):
|
||||
# Load in 5 different numbers/changes
|
||||
last_date=""
|
||||
for n in range(5):
|
||||
time.sleep(1)
|
||||
# Give the thread time to pick it up
|
||||
print("Bumping snapshot and checking.. ", n)
|
||||
last_date = str(time.time())
|
||||
|
||||
@@ -21,10 +21,11 @@ def set_response_with_filter():
|
||||
f.write(test_return_data)
|
||||
return None
|
||||
|
||||
def run_filter_test(client, content_filter):
|
||||
def run_filter_test(client, live_server, content_filter):
|
||||
|
||||
# Response WITHOUT the filter ID element
|
||||
set_original_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("form_delete", uuid="all"),
|
||||
@@ -79,6 +80,7 @@ def run_filter_test(client, content_filter):
|
||||
"include_filters": content_filter,
|
||||
"fetch_backend": "html_requests"})
|
||||
|
||||
# A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts)
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data=notification_form_data,
|
||||
@@ -91,20 +93,21 @@ def run_filter_test(client, content_filter):
|
||||
# Now the notification should not exist, because we didnt reach the threshold
|
||||
assert not os.path.isfile("test-datastore/notification.txt")
|
||||
|
||||
# -2 because we would have checked twice above (on adding and on edit)
|
||||
# recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented)
|
||||
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2):
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i}"
|
||||
time.sleep(2) # delay for apprise to fire
|
||||
assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}"
|
||||
|
||||
# We should see something in the frontend
|
||||
res = client.get(url_for("index"))
|
||||
assert b'Warning, no filters were found' in res.data
|
||||
|
||||
# One more check should trigger it (see -2 above)
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
# One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(2) # delay for apprise to fire
|
||||
# Now it should exist and contain our "filter not found" alert
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
|
||||
@@ -149,13 +152,9 @@ def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
def test_check_include_filters_failure_notification(client, live_server):
|
||||
set_original_response()
|
||||
wait_for_all_checks(client)
|
||||
run_filter_test(client, '#nope-doesnt-exist')
|
||||
run_filter_test(client, live_server,'#nope-doesnt-exist')
|
||||
|
||||
def test_check_xpath_filter_failure_notification(client, live_server):
|
||||
set_original_response()
|
||||
time.sleep(1)
|
||||
run_filter_test(client, '//*[@id="nope-doesnt-exist"]')
|
||||
run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]')
|
||||
|
||||
# Test that notification is never sent
|
||||
|
||||
@@ -5,15 +5,13 @@ import os
|
||||
import json
|
||||
import logging
|
||||
from flask import url_for
|
||||
from .util import live_server_setup
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
def test_consistent_history(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
r = range(1, 50)
|
||||
r = range(1, 30)
|
||||
|
||||
for one in r:
|
||||
test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
|
||||
@@ -25,15 +23,8 @@ def test_consistent_history(client, live_server):
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(3)
|
||||
while True:
|
||||
res = client.get(url_for("index"))
|
||||
logging.debug("Waiting for 'Checking now' to go away..")
|
||||
if b'Checking now' not in res.data:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
time.sleep(3)
|
||||
# Essentially just triggers the DB write/update
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
@@ -44,8 +35,9 @@ def test_consistent_history(client, live_server):
|
||||
)
|
||||
assert b"Settings updated." in res.data
|
||||
|
||||
# Give it time to write it out
|
||||
time.sleep(3)
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
|
||||
|
||||
json_obj = None
|
||||
@@ -58,7 +50,7 @@ def test_consistent_history(client, live_server):
|
||||
# each one should have a history.txt containing just one line
|
||||
for w in json_obj['watching'].keys():
|
||||
history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
|
||||
assert os.path.isfile(history_txt_index_file), "History.txt should exist where I expect it - {}".format(history_txt_index_file)
|
||||
assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"
|
||||
|
||||
# Same like in model.Watch
|
||||
with open(history_txt_index_file, "r") as f:
|
||||
@@ -70,15 +62,15 @@ def test_consistent_history(client, live_server):
|
||||
w))
|
||||
# Find the snapshot one
|
||||
for fname in files_in_watch_dir:
|
||||
if fname != 'history.txt':
|
||||
if fname != 'history.txt' and 'html' not in fname:
|
||||
# contents should match what we requested as content returned from the test url
|
||||
with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f:
|
||||
contents = snapshot_f.read()
|
||||
watch_url = json_obj['watching'][w]['url']
|
||||
u = urlparse(watch_url)
|
||||
q = parse_qs(u[4])
|
||||
assert q['content'][0] == contents.strip(), "Snapshot file {} should contain {}".format(fname, q['content'][0])
|
||||
assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}"
|
||||
|
||||
|
||||
|
||||
assert len(files_in_watch_dir) == 2, "Should be just two files in the dir, history.txt and the snapshot"
|
||||
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
|
||||
|
||||
@@ -45,7 +45,6 @@ def test_highlight_ignore(client, live_server):
|
||||
)
|
||||
|
||||
res = client.get(url_for("edit_page", uuid=uuid))
|
||||
|
||||
# should be a regex now
|
||||
assert b'/oh\ yeah\ \d+/' in res.data
|
||||
|
||||
@@ -55,3 +54,7 @@ def test_highlight_ignore(client, live_server):
|
||||
# And it should register in the preview page
|
||||
res = client.get(url_for("preview_page", uuid=uuid))
|
||||
assert b'<div class="ignored">oh yeah 456' in res.data
|
||||
|
||||
# Should be in base.html
|
||||
assert b'csrftoken' in res.data
|
||||
|
||||
|
||||
@@ -41,19 +41,26 @@ and it can also be repeated
|
||||
from .. import html_tools
|
||||
|
||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||
text = html_tools.extract_json_as_string(content, "json:$.offers.price")
|
||||
assert text == "23.5"
|
||||
text = html_tools.extract_json_as_string(content, "json:$.offers.priceCurrency")
|
||||
assert text == '"AUD"'
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
|
||||
assert text == "5"
|
||||
|
||||
# also check for jq
|
||||
if jq_support:
|
||||
text = html_tools.extract_json_as_string(content, "jq:.offers.price")
|
||||
assert text == "23.5"
|
||||
text = html_tools.extract_json_as_string(content, "jq:.offers.priceCurrency")
|
||||
assert text == '"AUD"'
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
|
||||
assert text == "5"
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
|
||||
assert text == "5"
|
||||
text = html_tools.extract_json_as_string(content, "jqraw:.offers.priceCurrency")
|
||||
assert text == "AUD"
|
||||
|
||||
text = html_tools.extract_json_as_string('{"id":5}', "jqraw:.id")
|
||||
assert text == "5"
|
||||
|
||||
|
||||
# When nothing at all is found, it should throw JSONNOTFound
|
||||
# Which is caught and shown to the user in the watch-overview table
|
||||
@@ -64,6 +71,9 @@ and it can also be repeated
|
||||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
|
||||
|
||||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jqraw:.id")
|
||||
|
||||
|
||||
def test_unittest_inline_extract_body():
|
||||
content = """
|
||||
@@ -291,6 +301,10 @@ def test_check_jq_filter(client, live_server):
|
||||
if jq_support:
|
||||
check_json_filter('jq:.boss.name', client, live_server)
|
||||
|
||||
def test_check_jqraw_filter(client, live_server):
|
||||
if jq_support:
|
||||
check_json_filter('jqraw:.boss.name', client, live_server)
|
||||
|
||||
def check_json_filter_bool_val(json_filter, client, live_server):
|
||||
set_original_response()
|
||||
|
||||
@@ -345,6 +359,10 @@ def test_check_jq_filter_bool_val(client, live_server):
|
||||
if jq_support:
|
||||
check_json_filter_bool_val("jq:.available", client, live_server)
|
||||
|
||||
def test_check_jqraw_filter_bool_val(client, live_server):
|
||||
if jq_support:
|
||||
check_json_filter_bool_val("jq:.available", client, live_server)
|
||||
|
||||
# Re #265 - Extended JSON selector test
|
||||
# Stuff to consider here
|
||||
# - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
|
||||
@@ -491,4 +509,8 @@ def test_check_jsonpath_ext_filter(client, live_server):
|
||||
|
||||
def test_check_jq_ext_filter(client, live_server):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||
|
||||
def test_check_jqraw_ext_filter(client, live_server):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||
|
||||
@@ -69,6 +69,7 @@ def test_rss_and_token(client, live_server):
|
||||
|
||||
wait_for_all_checks(client)
|
||||
set_modified_response()
|
||||
time.sleep(1)
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
@@ -87,7 +88,7 @@ def test_rss_and_token(client, live_server):
|
||||
assert b"Access denied, bad token" not in res.data
|
||||
assert b"Random content" in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
def test_basic_cdata_rss_markup(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
import os
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
from . import content_fetchers
|
||||
from changedetectionio import html_tools
|
||||
from .processors.text_json_diff import FilterNotFoundInResponse
|
||||
from .processors.restock_diff import UnableToExtractRestockData
|
||||
from .processors.text_json_diff import FilterNotFoundInResponse
|
||||
from changedetectionio import html_tools
|
||||
from copy import deepcopy
|
||||
import os
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
|
||||
# A single update worker
|
||||
#
|
||||
@@ -245,14 +246,19 @@ class update_worker(threading.Thread):
|
||||
contents = b''
|
||||
process_changedetection_results = True
|
||||
update_obj = {}
|
||||
logger.info(f"Processing watch UUID {uuid} "
|
||||
f"Priority {queued_item_data.priority} "
|
||||
f"URL {self.datastore.data['watching'][uuid]['url']}")
|
||||
|
||||
# Clear last errors (move to preflight func?)
|
||||
self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
|
||||
|
||||
# DeepCopy so we can be sure we don't accidently change anything by reference
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
|
||||
logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
# Processor is what we are using for detecting the "Change"
|
||||
processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff')
|
||||
processor = watch.get('processor', 'text_json_diff')
|
||||
# if system...
|
||||
|
||||
# Abort processing when the content was the same as the last fetch
|
||||
@@ -272,14 +278,12 @@ class update_worker(threading.Thread):
|
||||
watch_uuid=uuid
|
||||
)
|
||||
|
||||
# Clear last errors (move to preflight func?)
|
||||
self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
|
||||
|
||||
update_handler.call_browser()
|
||||
|
||||
changed_detected, update_obj, contents = update_handler.run_changedetection(uuid,
|
||||
skip_when_checksum_same=skip_when_same_checksum,
|
||||
)
|
||||
changed_detected, update_obj, contents = update_handler.run_changedetection(
|
||||
watch=watch,
|
||||
skip_when_checksum_same=skip_when_same_checksum,
|
||||
)
|
||||
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
@@ -309,7 +313,8 @@ class update_worker(threading.Thread):
|
||||
})
|
||||
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
|
||||
@@ -325,11 +330,11 @@ class update_worker(threading.Thread):
|
||||
err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
|
||||
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
if e.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True)
|
||||
watch.save_xpath_data(data=e.xpath_data, as_error=True)
|
||||
if e.page_text:
|
||||
self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)
|
||||
watch.save_error_text(contents=e.page_text)
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||
process_changedetection_results = False
|
||||
@@ -340,17 +345,19 @@ class update_worker(threading.Thread):
|
||||
|
||||
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
|
||||
# Only when enabled, send the notification
|
||||
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
|
||||
c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
|
||||
if watch.get('filter_failure_notification_send', False):
|
||||
c = watch.get('consecutive_filter_failures', 5)
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||
0)
|
||||
logger.error(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
|
||||
logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
|
||||
if threshold > 0 and c >= threshold:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
if not watch.get('notification_muted'):
|
||||
self.send_filter_failure_notification(uuid)
|
||||
c = 0
|
||||
|
||||
@@ -362,7 +369,6 @@ class update_worker(threading.Thread):
|
||||
# Yes fine, so nothing todo, don't continue to process.
|
||||
process_changedetection_results = False
|
||||
changed_detected = False
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': False})
|
||||
except content_fetchers.exceptions.BrowserConnectError as e:
|
||||
self.datastore.update_watch(uuid=uuid,
|
||||
update_obj={'last_error': e.msg
|
||||
@@ -401,15 +407,15 @@ class update_worker(threading.Thread):
|
||||
}
|
||||
)
|
||||
|
||||
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
|
||||
c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
|
||||
if watch.get('filter_failure_notification_send', False):
|
||||
c = watch.get('consecutive_filter_failures', 5)
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||
0)
|
||||
logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
|
||||
if threshold > 0 and c >= threshold:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
if not watch.get('notification_muted'):
|
||||
self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
|
||||
c = 0
|
||||
|
||||
@@ -431,7 +437,7 @@ class update_worker(threading.Thread):
|
||||
except content_fetchers.exceptions.JSActionExceptions as e:
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
@@ -441,7 +447,7 @@ class update_worker(threading.Thread):
|
||||
err_text = "{} - {}".format(err_text, e.message)
|
||||
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code,
|
||||
@@ -465,8 +471,6 @@ class update_worker(threading.Thread):
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
|
||||
# Other serious error
|
||||
process_changedetection_results = False
|
||||
# import traceback
|
||||
# print(traceback.format_exc())
|
||||
|
||||
else:
|
||||
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
|
||||
@@ -474,7 +478,7 @@ class update_worker(threading.Thread):
|
||||
continue
|
||||
|
||||
# Mark that we never had any failures
|
||||
if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
|
||||
if not watch.get('ignore_status_codes'):
|
||||
update_obj['consecutive_filter_failures'] = 0
|
||||
|
||||
# Everything ran OK, clean off any previous error
|
||||
@@ -482,25 +486,48 @@ class update_worker(threading.Thread):
|
||||
|
||||
self.cleanup_error_artifacts(uuid)
|
||||
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
#
|
||||
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
|
||||
if process_changedetection_results:
|
||||
# Always save the screenshot if it's available
|
||||
|
||||
if update_handler.screenshot:
|
||||
watch.save_screenshot(screenshot=update_handler.screenshot)
|
||||
|
||||
if update_handler.xpath_data:
|
||||
watch.save_xpath_data(data=update_handler.xpath_data)
|
||||
|
||||
try:
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
# Also save the snapshot on the first time checked
|
||||
if changed_detected or not watch['last_checked']:
|
||||
if changed_detected or not watch.get('last_checked'):
|
||||
timestamp = round(time.time())
|
||||
|
||||
# Small hack so that we sleep just enough to allow 1 second between history snapshots
|
||||
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
|
||||
|
||||
if watch.newest_history_key and int(timestamp) == int(watch.newest_history_key):
|
||||
logger.warning(
|
||||
f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
|
||||
timestamp = str(int(timestamp) + 1)
|
||||
time.sleep(1)
|
||||
|
||||
watch.save_history_text(contents=contents,
|
||||
timestamp=str(round(time.time())),
|
||||
timestamp=timestamp,
|
||||
snapshot_id=update_obj.get('previous_md5', 'none'))
|
||||
|
||||
if update_handler.fetcher.content:
|
||||
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=timestamp)
|
||||
|
||||
# A change was detected
|
||||
if changed_detected:
|
||||
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
||||
if watch.history_n >= 2:
|
||||
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
if not watch.get('notification_muted'):
|
||||
self.send_content_changed_notification(watch_uuid=uuid)
|
||||
else:
|
||||
logger.info(f"Change triggered in UUID {uuid} due to first history saving (no notifications sent) - {watch['url']}")
|
||||
@@ -511,29 +538,23 @@ class update_worker(threading.Thread):
|
||||
logger.critical(str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
if self.datastore.data['watching'].get(uuid):
|
||||
# Always record that we atleast tried
|
||||
count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1
|
||||
|
||||
# Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
|
||||
try:
|
||||
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
|
||||
self.datastore.update_watch(uuid=uuid,
|
||||
update_obj={'remote_server_reply': server_header}
|
||||
)
|
||||
except Exception as e:
|
||||
pass
|
||||
# Always record that we atleast tried
|
||||
count = watch.get('check_count', 0) + 1
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||
'last_checked': round(time.time()),
|
||||
'check_count': count
|
||||
})
|
||||
# Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
|
||||
try:
|
||||
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
|
||||
self.datastore.update_watch(uuid=uuid,
|
||||
update_obj={'remote_server_reply': server_header}
|
||||
)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Always save the screenshot if it's available
|
||||
if update_handler.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
|
||||
if update_handler.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||
'last_checked': round(time.time()),
|
||||
'check_count': count
|
||||
})
|
||||
|
||||
|
||||
self.current_uuid = None # Done
|
||||
|
||||
@@ -68,9 +68,10 @@ services:
|
||||
|
||||
# If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that
|
||||
# and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used)
|
||||
# depends_on:
|
||||
# browser-chrome:
|
||||
# condition: service_started
|
||||
# depends_on:
|
||||
# playwright-chrome:
|
||||
# condition: service_started
|
||||
|
||||
|
||||
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
|
||||
# RECOMMENDED FOR FETCHING PAGES WITH CHROME
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Used by Pyppeteer
|
||||
pyee
|
||||
|
||||
eventlet==0.35.2 # related to dnspython fixes
|
||||
eventlet>=0.36.1 # fixes SSL error on Python 3.12
|
||||
feedgen~=0.9
|
||||
flask-compress
|
||||
# 0.6.3 included compatibility fix for werkzeug 3.x (2.x had deprecation of url handlers)
|
||||
@@ -23,13 +23,13 @@ validators~=0.21
|
||||
brotli~=1.0
|
||||
requests[socks]
|
||||
|
||||
urllib3==1.26.18
|
||||
urllib3==1.26.19
|
||||
chardet>2.3.0
|
||||
|
||||
wtforms~=3.0
|
||||
jsonpath-ng~=1.5.3
|
||||
|
||||
dnspython==2.6.1
|
||||
dnspython==2.6.1 # related to eventlet fixes
|
||||
|
||||
# jq not available on Windows so must be installed manually
|
||||
|
||||
@@ -41,10 +41,8 @@ apprise~=1.8.0
|
||||
# use v1.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
|
||||
paho-mqtt>=1.6.1,<2.0.0
|
||||
|
||||
# This mainly affects some ARM builds, which unlike the other builds ignores "ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1"
|
||||
# so without this pinning, the newer versions on ARM will forcefully try to build rust, which results in "rust compiler not found"
|
||||
# (introduced once apprise became a dep)
|
||||
cryptography~=3.4
|
||||
# Requires extra wheel for rPi
|
||||
cryptography~=42.0.8
|
||||
|
||||
# Used for CSS filtering
|
||||
beautifulsoup4
|
||||
@@ -85,4 +83,4 @@ jsonschema==4.17.3
|
||||
|
||||
loguru
|
||||
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
|
||||
greenlet >= 3.0.3
|
||||
greenlet >= 3.0.3
|
||||
|
||||
Reference in New Issue
Block a user