Compare commits

..

31 Commits

Author SHA1 Message Date
dgtlmoon
81004ae09a Adding simple memory usage test 2024-07-11 13:39:06 +02:00
dgtlmoon
ed38012c6e Code - Fixing deprecation warning (#2477) 2024-07-09 17:38:17 +02:00
dgtlmoon
f07ff9b55e UI - Visual Selector should still update when elements were not found (#2476) 2024-07-09 15:35:19 +02:00
Nectariferous
1c46914992 Code - Update/modernise diff.py (#2471) 2024-07-09 15:08:13 +02:00
dgtlmoon
e9c4037178 UI - Visual Selector - Multiple selections (refactor) (#2475) 2024-07-09 15:07:23 +02:00
dgtlmoon
1af342ef64 UI - Visual Selector now supports Shift+Click for multiple selections! 2024-07-05 20:43:26 +02:00
dgtlmoon
e09ee7da97 UI - Visual Selector - Show/visualise all/any matching filter elements from all filters in "CSS/JSONPath/JQ/XPath Filters" include filters (#2440) 2024-07-05 15:20:39 +02:00
dgtlmoon
09bc24ff34 UI - Visual Selector graphics should be centred 2024-07-05 14:33:36 +02:00
dgtlmoon
a1d04bb37f Snapshot count from history was not updated in watch after using [clear history] (#2459) 2024-07-05 11:09:31 +02:00
dgtlmoon
01f910f840 Fixing 'tags'' field from old installs (0.43.0+) could have wrong data-type causing crash 2024-07-04 15:23:06 +02:00
dgtlmoon
bed16009bb 0.45.25 2024-07-03 19:27:23 +02:00
dgtlmoon
faeed78ffb UI - Fixing preview/diff "ignore text" highlight button (refactor, didnt work in "preview" mode) (#2455) 2024-07-03 19:26:33 +02:00
dgtlmoon
5d9081ccb2 Restock detection - Updating detection texts 2024-07-03 18:45:36 +02:00
dgtlmoon
2cf1829073 UI - Mobile - Hiding empty columns 2024-07-03 17:13:31 +02:00
dgtlmoon
526551a205 UI - Mobile - Watch overview table - Sort/order buttons were not being shown correctly 2024-06-30 18:05:13 +02:00
dgtlmoon
ba139e7f3f Update docker-compose.yml - fix indentation re #2447 2024-06-28 23:08:37 +02:00
Max Michels
13e343f9da Restock detection - Added extra out-of-stock phrases for DE (#2442) 2024-06-26 11:03:00 +02:00
dgtlmoon
13be4623db Restock detection - updating texts 2024-06-25 13:23:43 +02:00
dgtlmoon
3b19e3d2bf UI - Fixing double punctuation in 'unpaused' message #2435 2024-06-24 09:15:48 +02:00
dependabot[bot]
ce42f8ea26 Build - Bump docker/build-push-action from 5 to 6 in the all group (#2436) 2024-06-24 08:50:02 +02:00
dgtlmoon
343e359b39 Now saving last two HTML snapshots for future reference, refactor, dont write screenshots and xpath to disk when no change detected (saves disk IO) (#2431) 2024-06-23 09:19:32 +02:00
Hritik Vijay
ffd160ce0e Filters - Implement jqraw: filter (use this to output nicer JSON format when selecting/filtering by JSON) (#2430) 2024-06-21 13:31:03 +02:00
dgtlmoon
d31fc860cc Build - fixing build warnings 2024-06-20 15:07:17 +02:00
dgtlmoon
90b357f457 Upgrade to Python 3.11 from 3.10, add faster prebuilt "wheels" for rPi devices, upgrade cryptography security library 2024-06-20 14:42:17 +02:00
dgtlmoon
cc147be76e Prefer pythons built in "importlib" over pkg_resources+setuptools (#2424) 2024-06-18 09:08:48 +02:00
dependabot[bot]
8ae5ed76ce Security/dependabot - Bump urllib3 from 1.26.18 to 1.26.19 (#2423) 2024-06-18 08:23:12 +02:00
dgtlmoon
a9ed113369 0.45.24 2024-06-17 13:27:11 +02:00
dgtlmoon
eacf920b9a Update eventlet ( Fixes SSL error on Python 3.12 ) (#2419) 2024-06-17 12:05:20 +02:00
dgtlmoon
c9af9b6374 Filter failure/not found notification threshold - Counter should be reset when editing a watch, clear watch errors on 'save' (#2413) 2024-06-17 11:42:41 +02:00
dependabot[bot]
5e65fb606b Bump dnspython from 2.3.0 to 2.6.1 (#2306) 2024-06-17 10:23:50 +02:00
dgtlmoon
434a1b242e Improve testing for Python 3.10, 3.11 and 3.12 2024-06-17 09:46:54 +02:00
92 changed files with 920 additions and 718 deletions

View File

@@ -88,7 +88,7 @@ jobs:
- name: Build and push :dev - name: Build and push :dev
id: docker_build id: docker_build
if: ${{ github.ref }} == "refs/heads/master" if: ${{ github.ref }} == "refs/heads/master"
uses: docker/build-push-action@v5 uses: docker/build-push-action@v6
with: with:
context: ./ context: ./
file: ./Dockerfile file: ./Dockerfile
@@ -106,7 +106,7 @@ jobs:
- name: Build and push :tag - name: Build and push :tag
id: docker_build_tag_release id: docker_build_tag_release
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
uses: docker/build-push-action@v5 uses: docker/build-push-action@v6
with: with:
context: ./ context: ./
file: ./Dockerfile file: ./Dockerfile

View File

@@ -51,7 +51,7 @@ jobs:
# Check we can still build under alpine/musl # Check we can still build under alpine/musl
- name: Test that the docker containers can build (musl via alpine check) - name: Test that the docker containers can build (musl via alpine check)
id: docker_build_musl id: docker_build_musl
uses: docker/build-push-action@v5 uses: docker/build-push-action@v6
with: with:
context: ./ context: ./
file: ./.github/test/Dockerfile-alpine file: ./.github/test/Dockerfile-alpine
@@ -59,7 +59,7 @@ jobs:
- name: Test that the docker containers can build - name: Test that the docker containers can build
id: docker_build id: docker_build
uses: docker/build-push-action@v5 uses: docker/build-push-action@v6
# https://github.com/docker/build-push-action#customizing # https://github.com/docker/build-push-action#customizing
with: with:
context: ./ context: ./

View File

@@ -3,9 +3,9 @@
# @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py # @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py
# If you know how to fix it, please do! and test it for both 3.10 and 3.11 # If you know how to fix it, please do! and test it for both 3.10 and 3.11
ARG PYTHON_VERSION=3.10 ARG PYTHON_VERSION=3.11
FROM python:${PYTHON_VERSION}-slim-bookworm as builder FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
# See `cryptography` pin comment in requirements.txt # See `cryptography` pin comment in requirements.txt
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
@@ -26,7 +26,8 @@ WORKDIR /install
COPY requirements.txt /requirements.txt COPY requirements.txt /requirements.txt
RUN pip install --target=/dependencies -r /requirements.txt # --extra-index-url https://www.piwheels.org/simple is for cryptography module to be prebuilt (or rustc etc needs to be installed)
RUN pip install --extra-index-url https://www.piwheels.org/simple --target=/dependencies -r /requirements.txt
# Playwright is an alternative to Selenium # Playwright is an alternative to Selenium
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki # Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.45.23' __version__ = '0.45.25'
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError

View File

@@ -170,23 +170,33 @@ class WatchSingleHistory(Resource):
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
@apiName Get single snapshot content @apiName Get single snapshot content
@apiGroup Watch History @apiGroup Watch History
@apiParam {String} [html] Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp)
@apiSuccess (200) {String} OK @apiSuccess (200) {String} OK
@apiSuccess (404) {String} ERR Not found @apiSuccess (404) {String} ERR Not found
""" """
watch = self.datastore.data['watching'].get(uuid) watch = self.datastore.data['watching'].get(uuid)
if not watch: if not watch:
abort(404, message='No watch exists with the UUID of {}'.format(uuid)) abort(404, message=f"No watch exists with the UUID of {uuid}")
if not len(watch.history): if not len(watch.history):
abort(404, message='Watch found but no history exists for the UUID {}'.format(uuid)) abort(404, message=f"Watch found but no history exists for the UUID {uuid}")
if timestamp == 'latest': if timestamp == 'latest':
timestamp = list(watch.history.keys())[-1] timestamp = list(watch.history.keys())[-1]
content = watch.get_history_snapshot(timestamp) if request.args.get('html'):
content = watch.get_fetched_html(timestamp)
if content:
response = make_response(content, 200)
response.mimetype = "text/html"
else:
response = make_response("No content found", 404)
response.mimetype = "text/plain"
else:
content = watch.get_history_snapshot(timestamp)
response = make_response(content, 200)
response.mimetype = "text/plain"
response = make_response(content, 200)
response.mimetype = "text/plain"
return response return response

View File

@@ -187,8 +187,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
if is_last_step and u: if is_last_step and u:
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data() (screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data()
datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot) watch = datastore.data['watching'].get(uuid)
datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data) if watch:
watch.save_screenshot(screenshot=screenshot)
watch.save_xpath_data(data=xpath_data)
# if not this_session.page: # if not this_session.page:
# cleanup_playwright_session() # cleanup_playwright_session()

View File

@@ -255,8 +255,9 @@ class browsersteps_live_ui(steppable_browser_interface):
def get_current_state(self): def get_current_state(self):
"""Return the screenshot and interactive elements mapping, generally always called after action_()""" """Return the screenshot and interactive elements mapping, generally always called after action_()"""
from pkg_resources import resource_string import importlib.resources
xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8') xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
now = time.time() now = time.time()
self.page.wait_for_timeout(1 * 1000) self.page.wait_for_timeout(1 * 1000)
@@ -287,11 +288,9 @@ class browsersteps_live_ui(steppable_browser_interface):
:param current_include_filters: :param current_include_filters:
:return: :return:
""" """
import importlib.resources
self.page.evaluate("var include_filters=''") self.page.evaluate("var include_filters=''")
from pkg_resources import resource_string xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8')
from changedetectionio.content_fetchers import visualselector_xpath_selectors from changedetectionio.content_fetchers import visualselector_xpath_selectors
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")

View File

@@ -63,7 +63,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
<ul> <ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li> <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %} {% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li> <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
{% else %} {% else %}
<li>jq support not installed</li> <li>jq support not installed</li>
{% endif %} {% endif %}

View File

@@ -64,10 +64,9 @@ class Fetcher():
render_extract_delay = 0 render_extract_delay = 0
def __init__(self): def __init__(self):
from pkg_resources import resource_string import importlib.resources
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8') self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()
self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8')
@abstractmethod @abstractmethod
def get_error(self): def get_error(self):

View File

@@ -87,11 +87,12 @@ class ScreenshotUnavailable(Exception):
class ReplyWithContentButNoText(Exception): class ReplyWithContentButNoText(Exception):
def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content=''): def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content='', xpath_data=None):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
self.status_code = status_code self.status_code = status_code
self.url = url self.url = url
self.screenshot = screenshot self.screenshot = screenshot
self.has_filters = has_filters self.has_filters = has_filters
self.html_content = html_content self.html_content = html_content
self.xpath_data = xpath_data
return return

View File

@@ -0,0 +1 @@
# resources for browser injection/scraping

View File

@@ -30,14 +30,21 @@ function isItemInStock() {
'dieser artikel ist bald wieder verfügbar', 'dieser artikel ist bald wieder verfügbar',
'dostępne wkrótce', 'dostępne wkrótce',
'en rupture de stock', 'en rupture de stock',
'ist derzeit nicht auf lager', 'isn\'t in stock right now',
'isnt in stock right now',
'isnt in stock right now',
'item is no longer available', 'item is no longer available',
'let me know when it\'s available', 'let me know when it\'s available',
'mail me when available',
'message if back in stock', 'message if back in stock',
'nachricht bei', 'nachricht bei',
'nicht auf lager', 'nicht auf lager',
'nicht lagernd',
'nicht lieferbar', 'nicht lieferbar',
'nicht verfügbar',
'nicht vorrätig',
'nicht zur verfügung', 'nicht zur verfügung',
'nie znaleziono produktów',
'niet beschikbaar', 'niet beschikbaar',
'niet leverbaar', 'niet leverbaar',
'niet op voorraad', 'niet op voorraad',
@@ -48,6 +55,7 @@ function isItemInStock() {
'not currently available', 'not currently available',
'not in stock', 'not in stock',
'notify me when available', 'notify me when available',
'notify me',
'notify when available', 'notify when available',
'não estamos a aceitar encomendas', 'não estamos a aceitar encomendas',
'out of stock', 'out of stock',
@@ -62,12 +70,16 @@ function isItemInStock() {
'this item is currently unavailable', 'this item is currently unavailable',
'tickets unavailable', 'tickets unavailable',
'tijdelijk uitverkocht', 'tijdelijk uitverkocht',
'unavailable nearby',
'unavailable tickets', 'unavailable tickets',
'vergriffen',
'vorbestellen',
'vorbestellung ist bald möglich', 'vorbestellung ist bald möglich',
'we couldn\'t find any products that match', 'we couldn\'t find any products that match',
'we do not currently have an estimate of when this product will be back in stock.', 'we do not currently have an estimate of when this product will be back in stock.',
'we don\'t know when or if this item will be back in stock.', 'we don\'t know when or if this item will be back in stock.',
'we were not able to find a match', 'we were not able to find a match',
'when this arrives in stock',
'zur zeit nicht an lager', 'zur zeit nicht an lager',
'品切れ', '品切れ',
'已售', '已售',

View File

@@ -182,6 +182,7 @@ visibleElementsArray.forEach(function (element) {
// Inject the current one set in the include_filters, which may be a CSS rule // Inject the current one set in the include_filters, which may be a CSS rule
// used for displaying the current one in VisualSelector, where its not one we generated. // used for displaying the current one in VisualSelector, where its not one we generated.
if (include_filters.length) { if (include_filters.length) {
let results;
// Foreach filter, go and find it on the page and add it to the results so we can visualise it again // Foreach filter, go and find it on the page and add it to the results so we can visualise it again
for (const f of include_filters) { for (const f of include_filters) {
bbox = false; bbox = false;
@@ -197,10 +198,15 @@ if (include_filters.length) {
if (f.startsWith('/') || f.startsWith('xpath')) { if (f.startsWith('/') || f.startsWith('xpath')) {
var qry_f = f.replace(/xpath(:|\d:)/, '') var qry_f = f.replace(/xpath(:|\d:)/, '')
console.log("[xpath] Scanning for included filter " + qry_f) console.log("[xpath] Scanning for included filter " + qry_f)
q = document.evaluate(qry_f, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
results = [];
for (let i = 0; i < xpathResult.snapshotLength; i++) {
results.push(xpathResult.snapshotItem(i));
}
} else { } else {
console.log("[css] Scanning for included filter " + f) console.log("[css] Scanning for included filter " + f)
q = document.querySelector(f); console.log("[css] Scanning for included filter " + f);
results = document.querySelectorAll(f);
} }
} catch (e) { } catch (e) {
// Maybe catch DOMException and alert? // Maybe catch DOMException and alert?
@@ -208,44 +214,45 @@ if (include_filters.length) {
console.log(e); console.log(e);
} }
if (q) { if (results.length) {
// Try to resolve //something/text() back to its /something so we can atleast get the bounding box
try {
if (typeof q.nodeName == 'string' && q.nodeName === '#text') {
q = q.parentElement
}
} catch (e) {
console.log(e)
console.log("xpath_element_scraper: #text resolver")
}
// #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element. // Iterate over the results
if (typeof q.getBoundingClientRect == 'function') { results.forEach(node => {
bbox = q.getBoundingClientRect(); // Try to resolve //something/text() back to its /something so we can atleast get the bounding box
console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
} else {
try { try {
// Try and see we can find its ownerElement if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
bbox = q.ownerElement.getBoundingClientRect(); node = node.parentElement
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) }
} catch (e) { } catch (e) {
console.log(e) console.log(e)
console.log("xpath_element_scraper: error looking up q.ownerElement") console.log("xpath_element_scraper: #text resolver")
} }
}
}
if (!q) { // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
console.log("xpath_element_scraper: filter element " + f + " was not found"); if (typeof node.getBoundingClientRect == 'function') {
} bbox = node.getBoundingClientRect();
console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
} else {
try {
// Try and see we can find its ownerElement
bbox = node.ownerElement.getBoundingClientRect();
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
} catch (e) {
console.log(e)
console.log("xpath_element_scraper: error looking up q.ownerElement")
}
}
if (bbox && bbox['width'] > 0 && bbox['height'] > 0) { if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
size_pos.push({ size_pos.push({
xpath: f, xpath: f,
width: parseInt(bbox['width']), width: parseInt(bbox['width']),
height: parseInt(bbox['height']), height: parseInt(bbox['height']),
left: parseInt(bbox['left']), left: parseInt(bbox['left']),
top: parseInt(bbox['top']) + scroll_y top: parseInt(bbox['top']) + scroll_y,
highlight_as_custom_filter: true
});
}
}); });
} }
} }

View File

@@ -1,62 +1,97 @@
# used for the notifications, the front-end is using a JS library
import difflib import difflib
from typing import List, Iterator, Union
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
"""Return a slice of the list, or a single element if start == end."""
return lst[start:end] if start != end else [lst[start]]
def same_slicer(l, a, b): def customSequenceMatcher(
if a == b: before: List[str],
return [l[a]] after: List[str],
else: include_equal: bool = False,
return l[a:b] include_removed: bool = True,
include_added: bool = True,
# like .compare but a little different output include_replaced: bool = True,
def customSequenceMatcher(before, after, include_equal=False, include_removed=True, include_added=True, include_replaced=True, include_change_type_prefix=True): include_change_type_prefix: bool = True
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after) ) -> Iterator[List[str]]:
"""
# @todo Line-by-line mode instead of buncghed, including `after` that is not in `before` (maybe unset?) Compare two sequences and yield differences based on specified parameters.
Args:
before (List[str]): Original sequence
after (List[str]): Modified sequence
include_equal (bool): Include unchanged parts
include_removed (bool): Include removed parts
include_added (bool): Include added parts
include_replaced (bool): Include replaced parts
include_change_type_prefix (bool): Add prefixes to indicate change types
Yields:
List[str]: Differences between sequences
"""
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after)
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes(): for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if include_equal and tag == 'equal': if include_equal and tag == 'equal':
g = before[alo:ahi] yield before[alo:ahi]
yield g
elif include_removed and tag == 'delete': elif include_removed and tag == 'delete':
row_prefix = "(removed) " if include_change_type_prefix else '' prefix = "(removed) " if include_change_type_prefix else ''
g = [ row_prefix + i for i in same_slicer(before, alo, ahi)] yield [f"{prefix}{line}" for line in same_slicer(before, alo, ahi)]
yield g
elif include_replaced and tag == 'replace': elif include_replaced and tag == 'replace':
row_prefix = "(changed) " if include_change_type_prefix else '' prefix_changed = "(changed) " if include_change_type_prefix else ''
g = [row_prefix + i for i in same_slicer(before, alo, ahi)] prefix_into = "(into) " if include_change_type_prefix else ''
row_prefix = "(into) " if include_change_type_prefix else '' yield [f"{prefix_changed}{line}" for line in same_slicer(before, alo, ahi)] + \
g += [row_prefix + i for i in same_slicer(after, blo, bhi)] [f"{prefix_into}{line}" for line in same_slicer(after, blo, bhi)]
yield g
elif include_added and tag == 'insert': elif include_added and tag == 'insert':
row_prefix = "(added) " if include_change_type_prefix else '' prefix = "(added) " if include_change_type_prefix else ''
g = [row_prefix + i for i in same_slicer(after, blo, bhi)] yield [f"{prefix}{line}" for line in same_slicer(after, blo, bhi)]
yield g
# only_differences - only return info about the differences, no context def render_diff(
# line_feed_sep could be "<br>" or "<li>" or "\n" etc previous_version_file_contents: str,
def render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=True, include_replaced=True, line_feed_sep="\n", include_change_type_prefix=True, patch_format=False): newest_version_file_contents: str,
include_equal: bool = False,
newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()] include_removed: bool = True,
include_added: bool = True,
if previous_version_file_contents: include_replaced: bool = True,
previous_version_file_contents = [line.rstrip() for line in previous_version_file_contents.splitlines()] line_feed_sep: str = "\n",
else: include_change_type_prefix: bool = True,
previous_version_file_contents = "" patch_format: bool = False
) -> str:
"""
Render the difference between two file contents.
Args:
previous_version_file_contents (str): Original file contents
newest_version_file_contents (str): Modified file contents
include_equal (bool): Include unchanged parts
include_removed (bool): Include removed parts
include_added (bool): Include added parts
include_replaced (bool): Include replaced parts
line_feed_sep (str): Separator for lines in output
include_change_type_prefix (bool): Add prefixes to indicate change types
patch_format (bool): Use patch format for output
Returns:
str: Rendered difference
"""
newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
if patch_format: if patch_format:
patch = difflib.unified_diff(previous_version_file_contents, newest_version_file_contents) patch = difflib.unified_diff(previous_lines, newest_lines)
return line_feed_sep.join(patch) return line_feed_sep.join(patch)
rendered_diff = customSequenceMatcher(before=previous_version_file_contents, rendered_diff = customSequenceMatcher(
after=newest_version_file_contents, before=previous_lines,
include_equal=include_equal, after=newest_lines,
include_removed=include_removed, include_equal=include_equal,
include_added=include_added, include_removed=include_removed,
include_replaced=include_replaced, include_added=include_added,
include_change_type_prefix=include_change_type_prefix) include_replaced=include_replaced,
include_change_type_prefix=include_change_type_prefix
)
# Recursively join lists def flatten(lst: List[Union[str, List[str]]]) -> str:
f = lambda L: line_feed_sep.join([f(x) if type(x) is list else x for x in L]) return line_feed_sep.join(flatten(x) if isinstance(x, list) else x for x in lst)
p= f(rendered_diff)
return p return flatten(rendered_diff)

View File

@@ -679,7 +679,10 @@ def changedetection_app(config=None, datastore_o=None):
if request.method == 'POST' and form.validate(): if request.method == 'POST' and form.validate():
extra_update_obj = {} extra_update_obj = {
'consecutive_filter_failures': 0,
'last_error' : False
}
if request.args.get('unpause_on_save'): if request.args.get('unpause_on_save'):
extra_update_obj['paused'] = False extra_update_obj['paused'] = False
@@ -718,7 +721,7 @@ def changedetection_app(config=None, datastore_o=None):
datastore.data['watching'][uuid].update(extra_update_obj) datastore.data['watching'][uuid].update(extra_update_obj)
if request.args.get('unpause_on_save'): if request.args.get('unpause_on_save'):
flash("Updated watch - unpaused!.") flash("Updated watch - unpaused!")
else: else:
flash("Updated watch.") flash("Updated watch.")

View File

@@ -3,8 +3,6 @@ from bs4 import BeautifulSoup
from inscriptis import get_text from inscriptis import get_text
from jsonpath_ng.ext import parse from jsonpath_ng.ext import parse
from typing import List from typing import List
from inscriptis.css_profiles import CSS_PROFILES, HtmlElement
from inscriptis.html_properties import Display
from inscriptis.model.config import ParserConfig from inscriptis.model.config import ParserConfig
from xml.sax.saxutils import escape as xml_escape from xml.sax.saxutils import escape as xml_escape
import json import json
@@ -196,12 +194,12 @@ def extract_element(find='title', html_content=''):
# #
def _parse_json(json_data, json_filter): def _parse_json(json_data, json_filter):
if 'json:' in json_filter: if json_filter.startswith("json:"):
jsonpath_expression = parse(json_filter.replace('json:', '')) jsonpath_expression = parse(json_filter.replace('json:', ''))
match = jsonpath_expression.find(json_data) match = jsonpath_expression.find(json_data)
return _get_stripped_text_from_json_match(match) return _get_stripped_text_from_json_match(match)
if 'jq:' in json_filter: if json_filter.startswith("jq:") or json_filter.startswith("jqraw:"):
try: try:
import jq import jq
@@ -209,10 +207,15 @@ def _parse_json(json_data, json_filter):
# `jq` requires full compilation in windows and so isn't generally available # `jq` requires full compilation in windows and so isn't generally available
raise Exception("jq not support not found") raise Exception("jq not support not found")
jq_expression = jq.compile(json_filter.replace('jq:', '')) if json_filter.startswith("jq:"):
match = jq_expression.input(json_data).all() jq_expression = jq.compile(json_filter.removeprefix("jq:"))
match = jq_expression.input(json_data).all()
return _get_stripped_text_from_json_match(match)
return _get_stripped_text_from_json_match(match) if json_filter.startswith("jqraw:"):
jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
match = jq_expression.input(json_data).all()
return '\n'.join(str(item) for item in match)
def _get_stripped_text_from_json_match(match): def _get_stripped_text_from_json_match(match):
s = [] s = []

View File

@@ -5,6 +5,7 @@ from changedetectionio.notification import (
default_notification_title, default_notification_title,
) )
# Equal to or greater than this number of FilterNotFoundInResponse exceptions will trigger a filter-not-found notification
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6 _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36' DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'

View File

@@ -238,6 +238,8 @@ class model(dict):
if len(tmp_history): if len(tmp_history):
self.__newest_history_key = list(tmp_history.keys())[-1] self.__newest_history_key = list(tmp_history.keys())[-1]
else:
self.__newest_history_key = None
self.__history_n = len(tmp_history) self.__history_n = len(tmp_history)
@@ -328,14 +330,9 @@ class model(dict):
def save_history_text(self, contents, timestamp, snapshot_id): def save_history_text(self, contents, timestamp, snapshot_id):
import brotli import brotli
self.ensure_data_dir_exists() logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
# Small hack so that we sleep just enough to allow 1 second between history snapshots self.ensure_data_dir_exists()
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
logger.warning(f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
timestamp = str(int(timestamp) + 1)
time.sleep(1)
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024)) threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False')) skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
@@ -528,8 +525,42 @@ class model(dict):
# None is set # None is set
return False return False
def save_error_text(self, contents):
self.ensure_data_dir_exists()
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
with open(target_path, 'w') as f:
f.write(contents)
def get_last_fetched_before_filters(self): def save_xpath_data(self, data, as_error=False):
import json
if as_error:
target_path = os.path.join(self.watch_data_dir, "elements-error.json")
else:
target_path = os.path.join(self.watch_data_dir, "elements.json")
self.ensure_data_dir_exists()
with open(target_path, 'w') as f:
f.write(json.dumps(data))
f.close()
# Save as PNG, PNG is larger but better for doing visual diff in the future
def save_screenshot(self, screenshot: bytes, as_error=False):
if as_error:
target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
else:
target_path = os.path.join(self.watch_data_dir, "last-screenshot.png")
self.ensure_data_dir_exists()
with open(target_path, 'wb') as f:
f.write(screenshot)
f.close()
def get_last_fetched_text_before_filters(self):
import brotli import brotli
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br') filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
@@ -544,12 +575,56 @@ class model(dict):
with open(filepath, 'rb') as f: with open(filepath, 'rb') as f:
return(brotli.decompress(f.read()).decode('utf-8')) return(brotli.decompress(f.read()).decode('utf-8'))
def save_last_fetched_before_filters(self, contents): def save_last_text_fetched_before_filters(self, contents):
import brotli import brotli
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br') filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
with open(filepath, 'wb') as f: with open(filepath, 'wb') as f:
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT)) f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
def save_last_fetched_html(self, timestamp, contents):
import brotli
self.ensure_data_dir_exists()
snapshot_fname = f"{timestamp}.html.br"
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
with open(filepath, 'wb') as f:
contents = contents.encode('utf-8') if isinstance(contents, str) else contents
try:
f.write(brotli.compress(contents))
except Exception as e:
logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}")
logger.warning(e)
f.write(contents)
self._prune_last_fetched_html_snapshots()
def get_fetched_html(self, timestamp):
import brotli
snapshot_fname = f"{timestamp}.html.br"
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
if os.path.isfile(filepath):
with open(filepath, 'rb') as f:
return (brotli.decompress(f.read()).decode('utf-8'))
return False
def _prune_last_fetched_html_snapshots(self):
dates = list(self.history.keys())
dates.reverse()
for index, timestamp in enumerate(dates):
snapshot_fname = f"{timestamp}.html.br"
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
# Keep only the first 2
if index > 1 and os.path.isfile(filepath):
os.remove(filepath)
@property @property
def get_browsersteps_available_screenshots(self): def get_browsersteps_available_screenshots(self):
"For knowing which screenshots are available to show the user in BrowserSteps UI" "For knowing which screenshots are available to show the user in BrowserSteps UI"

View File

@@ -1,5 +1,6 @@
from abc import abstractmethod from abc import abstractmethod
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from changedetectionio.model import Watch
from copy import deepcopy from copy import deepcopy
from loguru import logger from loguru import logger
import hashlib import hashlib
@@ -138,7 +139,7 @@ class difference_detection_processor():
# After init, call run_changedetection() which will do the actual change-detection # After init, call run_changedetection() which will do the actual change-detection
@abstractmethod @abstractmethod
def run_changedetection(self, uuid, skip_when_checksum_same=True): def run_changedetection(self, watch: Watch, skip_when_checksum_same=True):
update_obj = {'last_notification_error': False, 'last_error': False} update_obj = {'last_notification_error': False, 'last_error': False}
some_data = 'xxxxx' some_data = 'xxxxx'
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()

View File

@@ -1,6 +1,5 @@
from . import difference_detection_processor from . import difference_detection_processor
from copy import deepcopy
from loguru import logger from loguru import logger
import hashlib import hashlib
import urllib3 import urllib3
@@ -20,10 +19,7 @@ class perform_site_check(difference_detection_processor):
screenshot = None screenshot = None
xpath_data = None xpath_data = None
def run_changedetection(self, uuid, skip_when_checksum_same=True): def run_changedetection(self, watch, skip_when_checksum_same=True):
# DeepCopy so we can be sure we don't accidently change anything by reference
watch = deepcopy(self.datastore.data['watching'].get(uuid))
if not watch: if not watch:
raise Exception("Watch no longer exists.") raise Exception("Watch no longer exists.")
@@ -44,13 +40,13 @@ class perform_site_check(difference_detection_processor):
fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest() fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest()
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold. # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
update_obj["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False update_obj["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
logger.debug(f"Watch UUID {uuid} restock check returned '{self.fetcher.instock_data}' from JS scraper.") logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
else: else:
raise UnableToExtractRestockData(status_code=self.fetcher.status_code) raise UnableToExtractRestockData(status_code=self.fetcher.status_code)
# The main thing that all this at the moment comes down to :) # The main thing that all this at the moment comes down to :)
changed_detected = False changed_detected = False
logger.debug(f"Watch UUID {uuid} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") logger.debug(f"Watch UUID {watch.get('uuid')} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5: if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
# Yes if we only care about it going to instock, AND we are in stock # Yes if we only care about it going to instock, AND we are in stock

View File

@@ -10,18 +10,18 @@ from . import difference_detection_processor
from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
from changedetectionio import html_tools, content_fetchers from changedetectionio import html_tools, content_fetchers
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
import changedetectionio.content_fetchers
from copy import deepcopy
from loguru import logger from loguru import logger
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
name = 'Webpage Text/HTML, JSON and PDF changes' name = 'Webpage Text/HTML, JSON and PDF changes'
description = 'Detects all text changes where possible' description = 'Detects all text changes where possible'
json_filter_prefixes = ['json:', 'jq:'] json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
class FilterNotFoundInResponse(ValueError): class FilterNotFoundInResponse(ValueError):
def __init__(self, msg): def __init__(self, msg, screenshot=None, xpath_data=None):
self.screenshot = screenshot
self.xpath_data = xpath_data
ValueError.__init__(self, msg) ValueError.__init__(self, msg)
@@ -34,14 +34,12 @@ class PDFToHTMLToolNotFound(ValueError):
# (set_proxy_from_list) # (set_proxy_from_list)
class perform_site_check(difference_detection_processor): class perform_site_check(difference_detection_processor):
def run_changedetection(self, uuid, skip_when_checksum_same=True): def run_changedetection(self, watch, skip_when_checksum_same=True):
changed_detected = False changed_detected = False
html_content = "" html_content = ""
screenshot = False # as bytes screenshot = False # as bytes
stripped_text_from_html = "" stripped_text_from_html = ""
# DeepCopy so we can be sure we don't accidently change anything by reference
watch = deepcopy(self.datastore.data['watching'].get(uuid))
if not watch: if not watch:
raise Exception("Watch no longer exists.") raise Exception("Watch no longer exists.")
@@ -116,12 +114,12 @@ class perform_site_check(difference_detection_processor):
# Better would be if Watch.model could access the global data also # Better would be if Watch.model could access the global data also
# and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__ # and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__
# https://realpython.com/inherit-python-dict/ instead of doing it procedurely # https://realpython.com/inherit-python-dict/ instead of doing it procedurely
include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='include_filters') include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters')
# 1845 - remove duplicated filters in both group and watch include filter # 1845 - remove duplicated filters in both group and watch include filter
include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags)) include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags))
subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='subtractive_selectors'), subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'),
*watch.get("subtractive_selectors", []), *watch.get("subtractive_selectors", []),
*self.datastore.data["settings"]["application"].get("global_subtractive_selectors", []) *self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
] ]
@@ -188,7 +186,7 @@ class perform_site_check(difference_detection_processor):
append_pretty_line_formatting=not watch.is_source_type_url) append_pretty_line_formatting=not watch.is_source_type_url)
if not html_content.strip(): if not html_content.strip():
raise FilterNotFoundInResponse(include_filters_rule) raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
if has_subtractive_selectors: if has_subtractive_selectors:
html_content = html_tools.element_removal(subtractive_selectors, html_content) html_content = html_tools.element_removal(subtractive_selectors, html_content)
@@ -222,7 +220,7 @@ class perform_site_check(difference_detection_processor):
from .. import diff from .. import diff
# needs to not include (added) etc or it may get used twice # needs to not include (added) etc or it may get used twice
# Replace the processed text with the preferred result # Replace the processed text with the preferred result
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_before_filters(), rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
newest_version_file_contents=stripped_text_from_html, newest_version_file_contents=stripped_text_from_html,
include_equal=False, # not the same lines include_equal=False, # not the same lines
include_added=watch.get('filter_text_added', True), include_added=watch.get('filter_text_added', True),
@@ -231,7 +229,7 @@ class perform_site_check(difference_detection_processor):
line_feed_sep="\n", line_feed_sep="\n",
include_change_type_prefix=False) include_change_type_prefix=False)
watch.save_last_fetched_before_filters(text_content_before_ignored_filter) watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter)
if not rendered_diff and stripped_text_from_html: if not rendered_diff and stripped_text_from_html:
# We had some content, but no differences were found # We had some content, but no differences were found
@@ -246,9 +244,10 @@ class perform_site_check(difference_detection_processor):
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url, raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url,
status_code=self.fetcher.get_last_status_code(), status_code=self.fetcher.get_last_status_code(),
screenshot=screenshot, screenshot=self.fetcher.screenshot,
has_filters=has_filter_rule, has_filters=has_filter_rule,
html_content=html_content html_content=html_content,
xpath_data=self.fetcher.xpath_data
) )
# We rely on the actual text in the html output.. many sites have random script vars etc, # We rely on the actual text in the html output.. many sites have random script vars etc,
@@ -344,17 +343,17 @@ class perform_site_check(difference_detection_processor):
if not watch['title'] or not len(watch['title']): if not watch['title'] or not len(watch['title']):
update_obj['title'] = html_tools.extract_element(find='title', html_content=self.fetcher.content) update_obj['title'] = html_tools.extract_element(find='title', html_content=self.fetcher.content)
logger.debug(f"Watch UUID {uuid} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
if changed_detected: if changed_detected:
if watch.get('check_unique_lines', False): if watch.get('check_unique_lines', False):
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines()) has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
# One or more lines? unsure? # One or more lines? unsure?
if not has_unique_lines: if not has_unique_lines:
logger.debug(f"check_unique_lines: UUID {uuid} didnt have anything new setting change_detected=False") logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
changed_detected = False changed_detected = False
else: else:
logger.debug(f"check_unique_lines: UUID {uuid} had unique content") logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
# Always record the new checksum # Always record the new checksum
update_obj["previous_md5"] = fetched_md5 update_obj["previous_md5"] = fetched_md5

View File

@@ -1,14 +1,5 @@
$(document).ready(function () { $(document).ready(function () {
// duplicate
var csrftoken = $('input[name=csrf_token]').val();
$.ajaxSetup({
beforeSend: function (xhr, settings) {
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
xhr.setRequestHeader("X-CSRFToken", csrftoken)
}
}
})
var browsersteps_session_id; var browsersteps_session_id;
var browser_interface_seconds_remaining = 0; var browser_interface_seconds_remaining = 0;
var apply_buttons_disabled = false; var apply_buttons_disabled = false;

View File

@@ -0,0 +1,10 @@
$(document).ready(function () {
$.ajaxSetup({
beforeSend: function (xhr, settings) {
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
xhr.setRequestHeader("X-CSRFToken", csrftoken)
}
}
})
});

View File

@@ -1,13 +1,4 @@
$(document).ready(function () { $(document).ready(function () {
var csrftoken = $('input[name=csrf_token]').val();
$.ajaxSetup({
beforeSend: function (xhr, settings) {
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
xhr.setRequestHeader("X-CSRFToken", csrftoken)
}
}
})
$('.needs-localtime').each(function () { $('.needs-localtime').each(function () {
for (var option of this.options) { for (var option of this.options) {
var dateObject = new Date(option.value * 1000); var dateObject = new Date(option.value * 1000);
@@ -48,6 +39,12 @@ $(document).ready(function () {
$("#highlightSnippet").remove(); $("#highlightSnippet").remove();
} }
// Listen for Escape key press
window.addEventListener('keydown', function (e) {
if (e.key === 'Escape') {
clean();
}
}, false);
function dragTextHandler(event) { function dragTextHandler(event) {
console.log('mouseupped'); console.log('mouseupped');

View File

@@ -13,16 +13,6 @@ $(document).ready(function() {
$('#send-test-notification').click(function (e) { $('#send-test-notification').click(function (e) {
e.preventDefault(); e.preventDefault();
// this can be global
var csrftoken = $('input[name=csrf_token]').val();
$.ajaxSetup({
beforeSend: function(xhr, settings) {
if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
xhr.setRequestHeader("X-CSRFToken", csrftoken)
}
}
})
data = { data = {
notification_body: $('#notification_body').val(), notification_body: $('#notification_body').val(),
notification_format: $('#notification_format').val(), notification_format: $('#notification_format').val(),

View File

@@ -2,250 +2,258 @@
// All rights reserved. // All rights reserved.
// yes - this is really a hack, if you are a front-ender and want to help, please get in touch! // yes - this is really a hack, if you are a front-ender and want to help, please get in touch!
$(document).ready(function () { let runInClearMode = false;
var current_selected_i; $(document).ready(() => {
var state_clicked = false; let currentSelections = [];
let currentSelection = null;
let appendToList = false;
let c, xctx, ctx;
let xScale = 1, yScale = 1;
let selectorImage, selectorImageRect, selectorData;
var c;
// greyed out fill context // Global jQuery selectors with "Elem" appended
var xctx; const $selectorCanvasElem = $('#selector-canvas');
// redline highlight context const $includeFiltersElem = $("#include_filters");
var ctx; const $selectorBackgroundElem = $("img#selector-background");
const $selectorCurrentXpathElem = $("#selector-current-xpath span");
const $fetchingUpdateNoticeElem = $('.fetching-update-notice');
const $selectorWrapperElem = $("#selector-wrapper");
var current_default_xpath = []; // Color constants
var x_scale = 1; const FILL_STYLE_HIGHLIGHT = 'rgba(205,0,0,0.35)';
var y_scale = 1; const FILL_STYLE_GREYED_OUT = 'rgba(205,205,205,0.95)';
var selector_image; const STROKE_STYLE_HIGHLIGHT = 'rgba(255,0,0, 0.9)';
var selector_image_rect; const FILL_STYLE_REDLINE = 'rgba(255,0,0, 0.1)';
var selector_data; const STROKE_STYLE_REDLINE = 'rgba(225,0,0,0.9)';
$('#visualselector-tab').click(function () { $('#visualselector-tab').click(() => {
$("img#selector-background").off('load'); $selectorBackgroundElem.off('load');
state_clicked = false; currentSelections = [];
current_selected_i = false; bootstrapVisualSelector();
bootstrap_visualselector();
}); });
$(document).on('keydown', function (event) { function clearReset() {
if ($("img#selector-background").is(":visible")) { ctx.clearRect(0, 0, c.width, c.height);
if (event.key == "Escape") {
state_clicked = false; if ($includeFiltersElem.val().length) {
ctx.clearRect(0, 0, c.width, c.height); alert("Existing filters under the 'Filters & Triggers' tab were cleared.");
}
$includeFiltersElem.val('');
currentSelections = [];
// Means we ignore the xpaths from the scraper marked as sel.highlight_as_custom_filter (it matched a previous selector)
runInClearMode = true;
highlightCurrentSelected();
}
function splitToList(v) {
return v.split('\n').map(line => line.trim()).filter(line => line.length > 0);
}
function sortScrapedElementsBySize() {
// Sort the currentSelections array by area (width * height) in descending order
selectorData['size_pos'].sort((a, b) => {
const areaA = a.width * a.height;
const areaB = b.width * b.height;
return areaB - areaA;
});
}
$(document).on('keydown keyup', (event) => {
if (event.code === 'ShiftLeft' || event.code === 'ShiftRight') {
appendToList = event.type === 'keydown';
}
if (event.type === 'keydown') {
if ($selectorBackgroundElem.is(":visible") && event.key === "Escape") {
clearReset();
} }
} }
}); });
// For when the page loads $('#clear-selector').on('click', () => {
if (!window.location.hash || window.location.hash != '#visualselector') { clearReset();
$("img#selector-background").attr('src', ''); });
// So if they start switching between visualSelector and manual filters, stop it from rendering old filters
$('li.tab a').on('click', () => {
runInClearMode = true;
});
if (!window.location.hash || window.location.hash !== '#visualselector') {
$selectorBackgroundElem.attr('src', '');
return; return;
} }
// Handle clearing button/link bootstrapVisualSelector();
$('#clear-selector').on('click', function (event) {
if (!state_clicked) {
alert('Oops, Nothing selected!');
}
state_clicked = false;
ctx.clearRect(0, 0, c.width, c.height);
xctx.clearRect(0, 0, c.width, c.height);
$("#include_filters").val('');
});
function bootstrapVisualSelector() {
bootstrap_visualselector(); $selectorBackgroundElem
.on("error", () => {
$fetchingUpdateNoticeElem.html("<strong>Ooops!</strong> The VisualSelector tool needs at least one fetched page, please unpause the watch and/or wait for the watch to complete fetching and then reload this page.")
function bootstrap_visualselector() { .css('color', '#bb0000');
if (1) { $('#selector-current-xpath, #clear-selector').hide();
// bootstrap it, this will trigger everything else })
$("img#selector-background").on("error", function () { .on('load', () => {
$('.fetching-update-notice').html("<strong>Ooops!</strong> The VisualSelector tool needs atleast one fetched page, please unpause the watch and/or wait for the watch to complete fetching and then reload this page.");
$('.fetching-update-notice').css('color','#bb0000');
$('#selector-current-xpath').hide();
$('#clear-selector').hide();
}).bind('load', function () {
console.log("Loaded background..."); console.log("Loaded background...");
c = document.getElementById("selector-canvas"); c = document.getElementById("selector-canvas");
// greyed out fill context
xctx = c.getContext("2d"); xctx = c.getContext("2d");
// redline highlight context
ctx = c.getContext("2d"); ctx = c.getContext("2d");
if ($("#include_filters").val().trim().length) { fetchData();
current_default_xpath = $("#include_filters").val().split(/\r?\n/g); $selectorCanvasElem.off("mousemove mousedown");
} else { })
current_default_xpath = []; .attr("src", screenshot_url);
}
fetch_data(); let s = `${$selectorBackgroundElem.attr('src')}?${new Date().getTime()}`;
$('#selector-canvas').off("mousemove mousedown"); $selectorBackgroundElem.attr('src', s);
// screenshot_url defined in the edit.html template
}).attr("src", screenshot_url);
}
// Tell visualSelector that the image should update
var s = $("img#selector-background").attr('src') + "?" + new Date().getTime();
$("img#selector-background").attr('src', s)
} }
// This is fired once the img src is loaded in bootstrap_visualselector() function alertIfFilterNotFound() {
function fetch_data() { let existingFilters = splitToList($includeFiltersElem.val());
// Image is ready let sizePosXpaths = selectorData['size_pos'].map(sel => sel.xpath);
$('.fetching-update-notice').html("Fetching element data..");
for (let filter of existingFilters) {
if (!sizePosXpaths.includes(filter)) {
alert(`One or more of your existing filters was not found and will be removed when a new filter is selected.`);
break;
}
}
}
function fetchData() {
$fetchingUpdateNoticeElem.html("Fetching element data..");
$.ajax({ $.ajax({
url: watch_visual_selector_data_url, url: watch_visual_selector_data_url,
context: document.body context: document.body
}).done(function (data) { }).done((data) => {
$('.fetching-update-notice').html("Rendering.."); $fetchingUpdateNoticeElem.html("Rendering..");
selector_data = data; selectorData = data;
console.log("Reported browser width from backend: " + data['browser_width']); sortScrapedElementsBySize();
state_clicked = false; console.log(`Reported browser width from backend: ${data['browser_width']}`);
set_scale();
reflow_selector();
$('.fetching-update-notice').fadeOut();
});
// Little sanity check for the user, alert them if something missing
alertIfFilterNotFound();
setScale();
reflowSelector();
$fetchingUpdateNoticeElem.fadeOut();
});
} }
function updateFiltersText() {
// Assuming currentSelections is already defined and contains the selections
let uniqueSelections = new Set(currentSelections.map(sel => (sel[0] === '/' ? `xpath:${sel.xpath}` : sel.xpath)));
function set_scale() { if (currentSelections.length > 0) {
// Convert the Set back to an array and join with newline characters
// some things to check if the scaling doesnt work let textboxFilterText = Array.from(uniqueSelections).join("\n");
// - that the widths/sizes really are about the actual screen size cat elements.json |grep -o width......|sort|uniq $includeFiltersElem.val(textboxFilterText);
$("#selector-wrapper").show();
selector_image = $("img#selector-background")[0];
selector_image_rect = selector_image.getBoundingClientRect();
// make the canvas the same size as the image
$('#selector-canvas').attr('height', selector_image_rect.height);
$('#selector-canvas').attr('width', selector_image_rect.width);
$('#selector-wrapper').attr('width', selector_image_rect.width);
x_scale = selector_image_rect.width / selector_data['browser_width'];
y_scale = selector_image_rect.height / selector_image.naturalHeight;
ctx.strokeStyle = 'rgba(255,0,0, 0.9)';
ctx.fillStyle = 'rgba(255,0,0, 0.1)';
ctx.lineWidth = 3;
console.log("scaling set x: " + x_scale + " by y:" + y_scale);
$("#selector-current-xpath").css('max-width', selector_image_rect.width);
}
function reflow_selector() {
$(window).resize(function () {
set_scale();
highlight_current_selected_i();
});
var selector_currnt_xpath_text = $("#selector-current-xpath span");
set_scale();
console.log(selector_data['size_pos'].length + " selectors found");
// highlight the default one if we can find it in the xPath list
// or the xpath matches the default one
found = false;
if (current_default_xpath.length) {
// Find the first one that matches
// @todo In the future paint all that match
for (const c of current_default_xpath) {
for (var i = selector_data['size_pos'].length; i !== 0; i--) {
if (selector_data['size_pos'][i - 1].xpath.trim() === c.trim()) {
console.log("highlighting " + c);
current_selected_i = i - 1;
highlight_current_selected_i();
found = true;
break;
}
}
if (found) {
break;
}
}
if (!found) {
alert("Unfortunately your existing CSS/xPath Filter was no longer found!");
}
} }
}
function setScale() {
$selectorWrapperElem.show();
selectorImage = $selectorBackgroundElem[0];
selectorImageRect = selectorImage.getBoundingClientRect();
$('#selector-canvas').bind('mousemove', function (e) { $selectorCanvasElem.attr({
if (state_clicked) { 'height': selectorImageRect.height,
return; 'width': selectorImageRect.width
});
$selectorWrapperElem.attr('width', selectorImageRect.width);
$('#visual-selector-heading').css('max-width', selectorImageRect.width + "px")
xScale = selectorImageRect.width / selectorImage.naturalWidth;
yScale = selectorImageRect.height / selectorImage.naturalHeight;
ctx.strokeStyle = STROKE_STYLE_HIGHLIGHT;
ctx.fillStyle = FILL_STYLE_REDLINE;
ctx.lineWidth = 3;
console.log("Scaling set x: " + xScale + " by y:" + yScale);
$("#selector-current-xpath").css('max-width', selectorImageRect.width);
}
function reflowSelector() {
$(window).resize(() => {
setScale();
highlightCurrentSelected();
});
setScale();
console.log(selectorData['size_pos'].length + " selectors found");
let existingFilters = splitToList($includeFiltersElem.val());
selectorData['size_pos'].forEach(sel => {
if ((!runInClearMode && sel.highlight_as_custom_filter) || existingFilters.includes(sel.xpath)) {
console.log("highlighting " + c);
currentSelections.push(sel);
} }
ctx.clearRect(0, 0, c.width, c.height); });
current_selected_i = null;
// Add in offset
if ((typeof e.offsetX === "undefined" || typeof e.offsetY === "undefined") || (e.offsetX === 0 && e.offsetY === 0)) { highlightCurrentSelected();
var targetOffset = $(e.target).offset(); updateFiltersText();
$selectorCanvasElem.bind('mousemove', handleMouseMove.debounce(5));
$selectorCanvasElem.bind('mousedown', handleMouseDown.debounce(5));
$selectorCanvasElem.bind('mouseleave', highlightCurrentSelected.debounce(5));
function handleMouseMove(e) {
if (!e.offsetX && !e.offsetY) {
const targetOffset = $(e.target).offset();
e.offsetX = e.pageX - targetOffset.left; e.offsetX = e.pageX - targetOffset.left;
e.offsetY = e.pageY - targetOffset.top; e.offsetY = e.pageY - targetOffset.top;
} }
// Reverse order - the most specific one should be deeper/"laster" ctx.fillStyle = FILL_STYLE_HIGHLIGHT;
// Basically, find the most 'deepest'
var found = 0;
ctx.fillStyle = 'rgba(205,0,0,0.35)';
// Will be sorted by smallest width*height first
for (var i = 0; i <= selector_data['size_pos'].length; i++) {
// draw all of them? let them choose somehow?
var sel = selector_data['size_pos'][i];
// If we are in a bounding-box
if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale
&&
e.offsetX > sel.left * y_scale && e.offsetX < sel.left * y_scale + sel.width * y_scale
) { selectorData['size_pos'].forEach(sel => {
if (e.offsetY > sel.top * yScale && e.offsetY < sel.top * yScale + sel.height * yScale &&
// FOUND ONE e.offsetX > sel.left * yScale && e.offsetX < sel.left * yScale + sel.width * yScale) {
set_current_selected_text(sel.xpath); setCurrentSelectedText(sel.xpath);
ctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale); drawHighlight(sel);
ctx.fillRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale); currentSelections.push(sel);
currentSelection = sel;
// no need to keep digging highlightCurrentSelected();
// @todo or, O to go out/up, I to go in currentSelections.pop();
// or double click to go up/out the selector?
current_selected_i = i;
found += 1;
break;
} }
} })
}.debounce(5));
function set_current_selected_text(s) {
selector_currnt_xpath_text[0].innerHTML = s;
}
function highlight_current_selected_i() {
if (state_clicked) {
state_clicked = false;
xctx.clearRect(0, 0, c.width, c.height);
return;
}
var sel = selector_data['size_pos'][current_selected_i];
if (sel[0] == '/') {
// @todo - not sure just checking / is right
$("#include_filters").val('xpath:' + sel.xpath);
} else {
$("#include_filters").val(sel.xpath);
}
xctx.fillStyle = 'rgba(205,205,205,0.95)';
xctx.strokeStyle = 'rgba(225,0,0,0.9)';
xctx.lineWidth = 3;
xctx.fillRect(0, 0, c.width, c.height);
// Clear out what only should be seen (make a clear/clean spot)
xctx.clearRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
xctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
state_clicked = true;
set_current_selected_text(sel.xpath);
} }
$('#selector-canvas').bind('mousedown', function (e) { function setCurrentSelectedText(s) {
highlight_current_selected_i(); $selectorCurrentXpathElem[0].innerHTML = s;
}); }
function drawHighlight(sel) {
ctx.strokeRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale);
ctx.fillRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale);
}
function handleMouseDown() {
// If we are in 'appendToList' mode, grow the list, if not, just 1
currentSelections = appendToList ? [...currentSelections, currentSelection] : [currentSelection];
highlightCurrentSelected();
updateFiltersText();
}
} }
function highlightCurrentSelected() {
xctx.fillStyle = FILL_STYLE_GREYED_OUT;
xctx.strokeStyle = STROKE_STYLE_REDLINE;
xctx.lineWidth = 3;
xctx.clearRect(0, 0, c.width, c.height);
currentSelections.forEach(sel => {
//xctx.clearRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale);
xctx.strokeRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale);
});
}
}); });

View File

@@ -1,6 +1,8 @@
#selector-wrapper { #selector-wrapper {
height: 100%; height: 100%;
text-align: center;
max-height: 70vh; max-height: 70vh;
overflow-y: scroll; overflow-y: scroll;
position: relative; position: relative;

View File

@@ -671,14 +671,25 @@ footer {
and also iPads specifically. and also iPads specifically.
*/ */
.watch-table { .watch-table {
/* make headings work on mobile */
thead {
display: block;
tr {
th {
display: inline-block;
}
}
.empty-cell {
display: none;
}
}
/* Force table to not be like tables anymore */ /* Force table to not be like tables anymore */
thead, tbody {
tbody, td,
th, tr {
td, display: block;
tr { }
display: block;
} }
.last-checked { .last-checked {
@@ -702,13 +713,6 @@ footer {
display: inline-block; display: inline-block;
} }
/* Hide table headers (but not display: none;, for accessibility) */
thead tr {
position: absolute;
top: -9999px;
left: -9999px;
}
.pure-table td, .pure-table td,
.pure-table th { .pure-table th {
border: none; border: none;
@@ -753,6 +757,7 @@ footer {
thead { thead {
background-color: var(--color-background-table-thead); background-color: var(--color-background-table-thead);
color: var(--color-text); color: var(--color-text);
border-bottom: 1px solid var(--color-background-table-thead);
} }
td, td,

View File

@@ -863,14 +863,17 @@ footer {
and also iPads specifically. and also iPads specifically.
*/ */
.watch-table { .watch-table {
/* make headings work on mobile */
/* Force table to not be like tables anymore */ /* Force table to not be like tables anymore */
/* Force table to not be like tables anymore */ /* Force table to not be like tables anymore */ }
/* Hide table headers (but not display: none;, for accessibility) */ } .watch-table thead {
.watch-table thead, display: block; }
.watch-table tbody, .watch-table thead tr th {
.watch-table th, display: inline-block; }
.watch-table td, .watch-table thead .empty-cell {
.watch-table tr { display: none; }
.watch-table tbody td,
.watch-table tbody tr {
display: block; } display: block; }
.watch-table .last-checked > span { .watch-table .last-checked > span {
vertical-align: middle; } vertical-align: middle; }
@@ -882,10 +885,6 @@ footer {
content: "Last Changed "; } content: "Last Changed "; }
.watch-table td.inline { .watch-table td.inline {
display: inline-block; } display: inline-block; }
.watch-table thead tr {
position: absolute;
top: -9999px;
left: -9999px; }
.watch-table .pure-table td, .watch-table .pure-table td,
.watch-table .pure-table th { .watch-table .pure-table th {
border: none; } border: none; }
@@ -912,7 +911,8 @@ footer {
border-color: var(--color-border-table-cell); } border-color: var(--color-border-table-cell); }
.pure-table thead { .pure-table thead {
background-color: var(--color-background-table-thead); background-color: var(--color-background-table-thead);
color: var(--color-text); } color: var(--color-text);
border-bottom: 1px solid var(--color-background-table-thead); }
.pure-table td, .pure-table td,
.pure-table th { .pure-table th {
border-left-color: var(--color-border-table-cell); } border-left-color: var(--color-border-table-cell); }
@@ -1065,6 +1065,7 @@ ul {
#selector-wrapper { #selector-wrapper {
height: 100%; height: 100%;
text-align: center;
max-height: 70vh; max-height: 70vh;
overflow-y: scroll; overflow-y: scroll;
position: relative; } position: relative; }

View File

@@ -163,7 +163,6 @@ class ChangeDetectionStore:
del (update_obj[dict_key]) del (update_obj[dict_key])
self.__data['watching'][uuid].update(update_obj) self.__data['watching'][uuid].update(update_obj)
self.needs_write = True self.needs_write = True
@property @property
@@ -243,6 +242,14 @@ class ChangeDetectionStore:
def clear_watch_history(self, uuid): def clear_watch_history(self, uuid):
import pathlib import pathlib
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"):
unlink(item)
# Force the attr to recalculate
bump = self.__data['watching'][uuid].history
# Do this last because it will trigger a recheck due to last_checked being zero
self.__data['watching'][uuid].update({ self.__data['watching'][uuid].update({
'browser_steps_last_error_step' : None, 'browser_steps_last_error_step' : None,
'check_count': 0, 'check_count': 0,
@@ -259,13 +266,6 @@ class ChangeDetectionStore:
'track_ldjson_price_data': None, 'track_ldjson_price_data': None,
}) })
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"):
unlink(item)
# Force the attr to recalculate
bump = self.__data['watching'][uuid].history
self.needs_write_urgent = True self.needs_write_urgent = True
def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True): def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
@@ -376,46 +376,6 @@ class ChangeDetectionStore:
return False return False
# Save as PNG, PNG is larger but better for doing visual diff in the future
def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False):
if not self.data['watching'].get(watch_uuid):
return
if as_error:
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png")
self.data['watching'][watch_uuid].ensure_data_dir_exists()
with open(target_path, 'wb') as f:
f.write(screenshot)
f.close()
def save_error_text(self, watch_uuid, contents):
if not self.data['watching'].get(watch_uuid):
return
self.data['watching'][watch_uuid].ensure_data_dir_exists()
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
with open(target_path, 'w') as f:
f.write(contents)
def save_xpath_data(self, watch_uuid, data, as_error=False):
if not self.data['watching'].get(watch_uuid):
return
if as_error:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
else:
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
self.data['watching'][watch_uuid].ensure_data_dir_exists()
with open(target_path, 'w') as f:
f.write(json.dumps(data))
f.close()
def sync_to_json(self): def sync_to_json(self):
logger.info("Saving JSON..") logger.info("Saving JSON..")
try: try:
@@ -884,3 +844,8 @@ class ChangeDetectionStore:
# Something custom here # Something custom here
self.__data["watching"][uuid]['time_between_check_use_default'] = False self.__data["watching"][uuid]['time_between_check_use_default'] = False
# Correctly set datatype for older installs where 'tag' was string and update_12 did not catch it
def update_16(self):
for uuid, watch in self.data['watching'].items():
if isinstance(watch.get('tags'), str):
self.data['watching'][uuid]['tags'] = []

View File

@@ -26,7 +26,11 @@
<meta name="msapplication-TileColor" content="#da532c"> <meta name="msapplication-TileColor" content="#da532c">
<meta name="msapplication-config" content="favicons/browserconfig.xml"> <meta name="msapplication-config" content="favicons/browserconfig.xml">
<meta name="theme-color" content="#ffffff"> <meta name="theme-color" content="#ffffff">
<script>
const csrftoken="{{ csrf_token() }}";
</script>
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script> <script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='csrf.js')}}" defer></script>
</head> </head>
<body> <body>

View File

@@ -292,7 +292,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
<ul> <ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li> <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %} {% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li> <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
{% else %} {% else %}
<li>jq support not installed</li> <li>jq support not installed</li>
{% endif %} {% endif %}
@@ -432,9 +432,8 @@ Unavailable") }}
<fieldset> <fieldset>
<div class="pure-control-group"> <div class="pure-control-group">
{% if visualselector_enabled %} {% if visualselector_enabled %}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline" id="visual-selector-heading">
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection &dash; after the <i>Browser Steps</i> has completed.<br> The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
This tool is a helper to manage filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab.
</span> </span>
<div id="selector-header"> <div id="selector-header">

View File

@@ -68,11 +68,11 @@
{% set link_order = "desc" if sort_order == 'asc' else "asc" %} {% set link_order = "desc" if sort_order == 'asc' else "asc" %}
{% set arrow_span = "" %} {% set arrow_span = "" %}
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th> <th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
<th></th> <th class="empty-cell"></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th> <th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th> <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th> <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
<th></th> <th class="empty-cell"></th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>

View File

@@ -1,4 +1,7 @@
#!/usr/bin/python3 #!/usr/bin/python3
import resource
import time
from threading import Thread
import pytest import pytest
from changedetectionio import changedetection_app from changedetectionio import changedetection_app
@@ -23,6 +26,36 @@ def reportlog(pytestconfig):
yield yield
logger.remove(handler_id) logger.remove(handler_id)
def track_memory(memory_usage, ):
while not memory_usage["stop"]:
max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
memory_usage["peak"] = max(memory_usage["peak"], max_rss)
time.sleep(0.01) # Adjust the sleep time as needed
@pytest.fixture(scope='function')
def measure_memory_usage(request):
memory_usage = {"peak": 0, "stop": False}
tracker_thread = Thread(target=track_memory, args=(memory_usage,))
tracker_thread.start()
yield
memory_usage["stop"] = True
tracker_thread.join()
# Note: ru_maxrss is in kilobytes on Unix-based systems
max_memory_used = memory_usage["peak"] / 1024 # Convert to MB
s = f"Peak memory used by the test {request.node.fspath} - '{request.node.name}': {max_memory_used:.2f} MB"
logger.debug(s)
with open("test-memory.log", 'a') as f:
f.write(f"{s}\n")
# Assert that the memory usage is less than 200MB
assert max_memory_used < 150, f"Memory usage exceeded 200MB: {max_memory_used:.2f} MB"
def cleanup(datastore_path): def cleanup(datastore_path):
import glob import glob
# Unlink test output files # Unlink test output files

View File

@@ -77,13 +77,13 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
# Requires playwright to be installed # Requires playwright to be installed
def test_request_via_custom_browser_url(client, live_server): def test_request_via_custom_browser_url(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
# We do this so we can grep the logs of the custom container and see if the request actually went through that container # We do this so we can grep the logs of the custom container and see if the request actually went through that container
do_test(client, live_server, make_test_use_extra_browser=True) do_test(client, live_server, make_test_use_extra_browser=True)
def test_request_not_via_custom_browser_url(client, live_server): def test_request_not_via_custom_browser_url(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
# We do this so we can grep the logs of the custom container and see if the request actually went through that container # We do this so we can grep the logs of the custom container and see if the request actually went through that container
do_test(client, live_server, make_test_use_extra_browser=False) do_test(client, live_server, make_test_use_extra_browser=False)

View File

@@ -6,7 +6,7 @@ from ..util import live_server_setup, wait_for_all_checks
import logging import logging
# Requires playwright to be installed # Requires playwright to be installed
def test_fetch_webdriver_content(client, live_server): def test_fetch_webdriver_content(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
##################### #####################

View File

@@ -3,7 +3,7 @@ from flask import url_for
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
def test_execute_custom_js(client, live_server): def test_execute_custom_js(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"

View File

@@ -5,7 +5,7 @@ from flask import url_for
from ..util import live_server_setup, wait_for_all_checks from ..util import live_server_setup, wait_for_all_checks
def test_preferred_proxy(client, live_server): def test_preferred_proxy(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
url = "http://chosen.changedetection.io" url = "http://chosen.changedetection.io"

View File

@@ -5,7 +5,7 @@ from flask import url_for
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
def test_noproxy_option(client, live_server): def test_noproxy_option(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
# Run by run_proxy_tests.sh # Run by run_proxy_tests.sh
# Call this URL then scan the containers that it never went through them # Call this URL then scan the containers that it never went through them

View File

@@ -5,7 +5,7 @@ from flask import url_for
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
# just make a request, we will grep in the docker logs to see it actually got called # just make a request, we will grep in the docker logs to see it actually got called
def test_check_basic_change_detection_functionality(client, live_server): def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
res = client.post( res = client.post(
url_for("import_page"), url_for("import_page"),

View File

@@ -6,7 +6,7 @@ from ..util import live_server_setup, wait_for_all_checks
import os import os
# just make a request, we will grep in the docker logs to see it actually got called # just make a request, we will grep in the docker logs to see it actually got called
def test_select_custom(client, live_server): def test_select_custom(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
# Goto settings, add our custom one # Goto settings, add our custom one

View File

@@ -5,7 +5,7 @@ from flask import url_for
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
def test_socks5(client, live_server): def test_socks5(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
# Setup a proxy # Setup a proxy

View File

@@ -7,7 +7,7 @@ from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
# should be proxies.json mounted from run_proxy_tests.sh already # should be proxies.json mounted from run_proxy_tests.sh already
# -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json # -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json
def test_socks5_from_proxiesjson_file(client, live_server): def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '') test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')

View File

@@ -48,7 +48,7 @@ def set_back_in_stock_response():
return None return None
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready # Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
def test_restock_detection(client, live_server): def test_restock_detection(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
#assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"

View File

@@ -40,7 +40,7 @@ def get_last_message_from_smtp_server():
# Requires running the test SMTP server # Requires running the test SMTP server
def test_check_notification_email_formats_default_HTML(client, live_server): def test_check_notification_email_formats_default_HTML(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # live_server_setup(live_server)
set_original_response() set_original_response()
@@ -92,7 +92,7 @@ def test_check_notification_email_formats_default_HTML(client, live_server):
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_check_notification_email_formats_default_Text_override_HTML(client, live_server): def test_check_notification_email_formats_default_Text_override_HTML(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # live_server_setup(live_server)
# HTML problems? see this # HTML problems? see this

View File

@@ -35,10 +35,10 @@ def set_original(excluding=None, add_line=None):
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data) f.write(test_return_data)
def test_setup(client, live_server): def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
def test_check_removed_line_contains_trigger(client, live_server): def test_check_removed_line_contains_trigger(client, live_server, measure_memory_usage):
# Give the endpoint time to spin up # Give the endpoint time to spin up
time.sleep(1) time.sleep(1)
@@ -103,7 +103,7 @@ def test_check_removed_line_contains_trigger(client, live_server):
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_check_add_line_contains_trigger(client, live_server): def test_check_add_line_contains_trigger(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
# Give the endpoint time to spin up # Give the endpoint time to spin up

View File

@@ -53,10 +53,10 @@ def is_valid_uuid(val):
return False return False
def test_setup(client, live_server): def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
def test_api_simple(client, live_server): def test_api_simple(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
api_key = extract_api_key_from_UI(client) api_key = extract_api_key_from_UI(client)
@@ -149,6 +149,15 @@ def test_api_simple(client, live_server):
headers={'x-api-key': api_key}, headers={'x-api-key': api_key},
) )
assert b'which has this one new line' in res.data assert b'which has this one new line' in res.data
assert b'<div id' not in res.data
# Fetch the HTML of the latest one
res = client.get(
url_for("watchsinglehistory", uuid=watch_uuid, timestamp='latest')+"?html=1",
headers={'x-api-key': api_key},
)
assert b'which has this one new line' in res.data
assert b'<div id' in res.data
# Fetch the whole watch # Fetch the whole watch
res = client.get( res = client.get(
@@ -232,7 +241,7 @@ def test_api_simple(client, live_server):
) )
assert len(res.json) == 0, "Watch list should be empty" assert len(res.json) == 0, "Watch list should be empty"
def test_access_denied(client, live_server): def test_access_denied(client, live_server, measure_memory_usage):
# `config_api_token_enabled` Should be On by default # `config_api_token_enabled` Should be On by default
res = client.get( res = client.get(
url_for("createwatch") url_for("createwatch")
@@ -278,7 +287,7 @@ def test_access_denied(client, live_server):
) )
assert b"Settings updated." in res.data assert b"Settings updated." in res.data
def test_api_watch_PUT_update(client, live_server): def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
api_key = extract_api_key_from_UI(client) api_key = extract_api_key_from_UI(client)
@@ -360,7 +369,7 @@ def test_api_watch_PUT_update(client, live_server):
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_api_import(client, live_server): def test_api_import(client, live_server, measure_memory_usage):
api_key = extract_api_key_from_UI(client) api_key = extract_api_key_from_UI(client)
res = client.post( res = client.post(

View File

@@ -2,13 +2,12 @@
import time import time
from flask import url_for from flask import url_for
from . util import live_server_setup from .util import live_server_setup, wait_for_all_checks
def test_basic_auth(client, live_server):
def test_basic_auth(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@") test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@")
@@ -19,8 +18,8 @@ def test_basic_auth(client, live_server):
follow_redirects=True follow_redirects=True
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
wait_for_all_checks(client)
time.sleep(1) time.sleep(1)
# Check form validation # Check form validation
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
@@ -29,7 +28,7 @@ def test_basic_auth(client, live_server):
) )
assert b"Updated watch." in res.data assert b"Updated watch." in res.data
time.sleep(1) wait_for_all_checks(client)
res = client.get( res = client.get(
url_for("preview_page", uuid="first"), url_for("preview_page", uuid="first"),
follow_redirects=True follow_redirects=True

View File

@@ -76,11 +76,11 @@ def set_response_without_ldjson():
f.write(test_return_data) f.write(test_return_data)
return None return None
def test_setup(client, live_server): def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
# actually only really used by the distll.io importer, but could be handy too # actually only really used by the distll.io importer, but could be handy too
def test_check_ldjson_price_autodetect(client, live_server): def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage):
set_response_with_ldjson() set_response_with_ldjson()
@@ -100,7 +100,7 @@ def test_check_ldjson_price_autodetect(client, live_server):
# Accept it # Accept it
uuid = extract_UUID_from_client(client) uuid = extract_UUID_from_client(client)
time.sleep(1)
client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True)) client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
wait_for_all_checks(client) wait_for_all_checks(client)
@@ -167,7 +167,7 @@ def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_
client.get(url_for("form_delete", uuid="all"), follow_redirects=True) client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_bad_ldjson_is_correctly_ignored(client, live_server): def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
test_return_data = """ test_return_data = """
<html> <html>

View File

@@ -3,7 +3,8 @@
import time import time
from flask import url_for from flask import url_for
from urllib.request import urlopen from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
extract_UUID_from_client
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
@@ -16,7 +17,7 @@ def test_inscriptus():
assert stripped_text_from_html == 'test!\nok man' assert stripped_text_from_html == 'test!\nok man'
def test_check_basic_change_detection_functionality(client, live_server): def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
live_server_setup(live_server) live_server_setup(live_server)
@@ -62,9 +63,6 @@ def test_check_basic_change_detection_functionality(client, live_server):
# Make a change # Make a change
set_modified_response() set_modified_response()
res = urlopen(url_for('test_endpoint', _external=True))
assert b'which has this one new line' in res.read()
# Force recheck # Force recheck
res = client.get(url_for("form_watch_checknow"), follow_redirects=True) res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data assert b'1 watches queued for rechecking.' in res.data
@@ -144,6 +142,14 @@ def test_check_basic_change_detection_functionality(client, live_server):
assert b'Mark all viewed' not in res.data assert b'Mark all viewed' not in res.data
assert b'unviewed' not in res.data assert b'unviewed' not in res.data
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
uuid = extract_UUID_from_client(client)
client.get(url_for("clear_watch_history", uuid=uuid))
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'preview/' in res.data
# #
# Cleanup everything # Cleanup everything
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)

View File

@@ -8,7 +8,7 @@ import re
import time import time
def test_backup(client, live_server): def test_backup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
set_original_response() set_original_response()

View File

@@ -60,7 +60,7 @@ def set_modified_response_minus_block_text():
f.write(test_return_data) f.write(test_return_data)
def test_check_block_changedetection_text_NOT_present(client, live_server): def test_check_block_changedetection_text_NOT_present(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
# Use a mix of case in ZzZ to prove it works case-insensitive. # Use a mix of case in ZzZ to prove it works case-insensitive.

View File

@@ -6,7 +6,7 @@ from . util import live_server_setup
def test_trigger_functionality(client, live_server): def test_trigger_functionality(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)

View File

@@ -70,7 +70,7 @@ def test_include_filters_output():
# Tests the whole stack works with the CSS Filter # Tests the whole stack works with the CSS Filter
def test_check_markup_include_filters_restriction(client, live_server): def test_check_markup_include_filters_restriction(client, live_server, measure_memory_usage):
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
include_filters = "#sametext" include_filters = "#sametext"
@@ -124,7 +124,7 @@ def test_check_markup_include_filters_restriction(client, live_server):
# Tests the whole stack works with the CSS Filter # Tests the whole stack works with the CSS Filter
def test_check_multiple_filters(client, live_server): def test_check_multiple_filters(client, live_server, measure_memory_usage):
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]" include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]"
@@ -180,7 +180,7 @@ def test_check_multiple_filters(client, live_server):
# The filter exists, but did not contain anything useful # The filter exists, but did not contain anything useful
# Mainly used when the filter contains just an IMG, this can happen when someone selects an image in the visual-selector # Mainly used when the filter contains just an IMG, this can happen when someone selects an image in the visual-selector
# Tests fetcher can throw a "ReplyWithContentButNoText" exception after applying filter and extracting text # Tests fetcher can throw a "ReplyWithContentButNoText" exception after applying filter and extracting text
def test_filter_is_empty_help_suggestion(client, live_server): def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
include_filters = "#blob-a" include_filters = "#blob-a"

View File

@@ -106,7 +106,7 @@ across multiple lines
) )
def test_element_removal_full(client, live_server): def test_element_removal_full(client, live_server, measure_memory_usage):
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
set_original_response() set_original_response()

View File

@@ -3,7 +3,7 @@
import time import time
from flask import url_for from flask import url_for
from .util import live_server_setup from .util import live_server_setup, wait_for_all_checks
import pytest import pytest
@@ -24,12 +24,9 @@ def set_html_response():
# In the case the server does not issue a charset= or doesnt have content_type header set # In the case the server does not issue a charset= or doesnt have content_type header set
def test_check_encoding_detection(client, live_server): def test_check_encoding_detection(client, live_server, measure_memory_usage):
set_html_response() set_html_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_endpoint', content_type="text/html", _external=True) test_url = url_for('test_endpoint', content_type="text/html", _external=True)
client.post( client.post(
@@ -39,7 +36,7 @@ def test_check_encoding_detection(client, live_server):
) )
# Give the thread time to pick it up # Give the thread time to pick it up
time.sleep(2) wait_for_all_checks(client)
res = client.get( res = client.get(
url_for("preview_page", uuid="first"), url_for("preview_page", uuid="first"),
@@ -53,12 +50,9 @@ def test_check_encoding_detection(client, live_server):
# In the case the server does not issue a charset= or doesnt have content_type header set # In the case the server does not issue a charset= or doesnt have content_type header set
def test_check_encoding_detection_missing_content_type_header(client, live_server): def test_check_encoding_detection_missing_content_type_header(client, live_server, measure_memory_usage):
set_html_response() set_html_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
client.post( client.post(
@@ -67,8 +61,7 @@ def test_check_encoding_detection_missing_content_type_header(client, live_serve
follow_redirects=True follow_redirects=True
) )
# Give the thread time to pick it up wait_for_all_checks(client)
time.sleep(2)
res = client.get( res = client.get(
url_for("preview_page", uuid="first"), url_for("preview_page", uuid="first"),

View File

@@ -54,7 +54,7 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text):
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_http_error_handler(client, live_server): def test_http_error_handler(client, live_server, measure_memory_usage):
_runner_test_http_errors(client, live_server, 403, 'Access denied') _runner_test_http_errors(client, live_server, 403, 'Access denied')
_runner_test_http_errors(client, live_server, 404, 'Page not found') _runner_test_http_errors(client, live_server, 404, 'Page not found')
_runner_test_http_errors(client, live_server, 500, '(Internal server error) received') _runner_test_http_errors(client, live_server, 500, '(Internal server error) received')
@@ -63,7 +63,7 @@ def test_http_error_handler(client, live_server):
assert b'Deleted' in res.data assert b'Deleted' in res.data
# Just to be sure error text is properly handled # Just to be sure error text is properly handled
def test_DNS_errors(client, live_server): def test_DNS_errors(client, live_server, measure_memory_usage):
# Give the endpoint time to spin up # Give the endpoint time to spin up
time.sleep(1) time.sleep(1)
@@ -87,7 +87,7 @@ def test_DNS_errors(client, live_server):
assert b'Deleted' in res.data assert b'Deleted' in res.data
# Re 1513 # Re 1513
def test_low_level_errors_clear_correctly(client, live_server): def test_low_level_errors_clear_correctly(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
# Give the endpoint time to spin up # Give the endpoint time to spin up
time.sleep(1) time.sleep(1)

View File

@@ -9,7 +9,7 @@ sleep_time_for_fetch_thread = 3
def test_check_extract_text_from_diff(client, live_server): def test_check_extract_text_from_diff(client, live_server, measure_memory_usage):
import time import time
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("Now it's {} seconds since epoch, time flies!".format(str(time.time()))) f.write("Now it's {} seconds since epoch, time flies!".format(str(time.time())))
@@ -29,6 +29,7 @@ def test_check_extract_text_from_diff(client, live_server):
# Load in 5 different numbers/changes # Load in 5 different numbers/changes
last_date="" last_date=""
for n in range(5): for n in range(5):
time.sleep(1)
# Give the thread time to pick it up # Give the thread time to pick it up
print("Bumping snapshot and checking.. ", n) print("Bumping snapshot and checking.. ", n)
last_date = str(time.time()) last_date = str(time.time())

View File

@@ -67,10 +67,10 @@ def set_multiline_response():
return None return None
def test_setup(client, live_server): def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
def test_check_filter_multiline(client, live_server): def test_check_filter_multiline(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
set_multiline_response() set_multiline_response()
@@ -122,7 +122,7 @@ def test_check_filter_multiline(client, live_server):
# but the last one, which also says 'lines' shouldnt be here (non-greedy match checking) # but the last one, which also says 'lines' shouldnt be here (non-greedy match checking)
assert b'aaand something lines' not in res.data assert b'aaand something lines' not in res.data
def test_check_filter_and_regex_extract(client, live_server): def test_check_filter_and_regex_extract(client, live_server, measure_memory_usage):
include_filters = ".changetext" include_filters = ".changetext"
@@ -205,7 +205,7 @@ def test_check_filter_and_regex_extract(client, live_server):
def test_regex_error_handling(client, live_server): def test_regex_error_handling(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)

View File

@@ -41,7 +41,7 @@ def set_response_with_filter():
f.write(test_return_data) f.write(test_return_data)
return None return None
def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_server): def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_server, measure_memory_usage):
# Filter knowingly doesn't exist, like someone setting up a known filter to see if some cinema tickets are on sale again # Filter knowingly doesn't exist, like someone setting up a known filter to see if some cinema tickets are on sale again
# And the page has that filter available # And the page has that filter available
# Then I should get a notification # Then I should get a notification

View File

@@ -21,10 +21,11 @@ def set_response_with_filter():
f.write(test_return_data) f.write(test_return_data)
return None return None
def run_filter_test(client, content_filter): def run_filter_test(client, live_server, content_filter):
# Response WITHOUT the filter ID element
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# cleanup for the next # cleanup for the next
client.get( client.get(
url_for("form_delete", uuid="all"), url_for("form_delete", uuid="all"),
@@ -79,6 +80,7 @@ def run_filter_test(client, content_filter):
"include_filters": content_filter, "include_filters": content_filter,
"fetch_backend": "html_requests"}) "fetch_backend": "html_requests"})
# A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts)
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data=notification_form_data, data=notification_form_data,
@@ -91,20 +93,21 @@ def run_filter_test(client, content_filter):
# Now the notification should not exist, because we didnt reach the threshold # Now the notification should not exist, because we didnt reach the threshold
assert not os.path.isfile("test-datastore/notification.txt") assert not os.path.isfile("test-datastore/notification.txt")
# -2 because we would have checked twice above (on adding and on edit) # recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented)
for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2): for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2):
res = client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client) wait_for_all_checks(client)
assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i}" time.sleep(2) # delay for apprise to fire
assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}"
# We should see something in the frontend # We should see something in the frontend
res = client.get(url_for("index"))
assert b'Warning, no filters were found' in res.data assert b'Warning, no filters were found' in res.data
# One more check should trigger it (see -2 above) # One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client) wait_for_all_checks(client)
time.sleep(2) # delay for apprise to fire
# Now it should exist and contain our "filter not found" alert # Now it should exist and contain our "filter not found" alert
assert os.path.isfile("test-datastore/notification.txt") assert os.path.isfile("test-datastore/notification.txt")
@@ -148,14 +151,10 @@ def run_filter_test(client, content_filter):
def test_setup(live_server): def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
def test_check_include_filters_failure_notification(client, live_server): def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage):
set_original_response() run_filter_test(client, live_server,'#nope-doesnt-exist')
wait_for_all_checks(client)
run_filter_test(client, '#nope-doesnt-exist')
def test_check_xpath_filter_failure_notification(client, live_server): def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage):
set_original_response() run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]')
time.sleep(1)
run_filter_test(client, '//*[@id="nope-doesnt-exist"]')
# Test that notification is never sent # Test that notification is never sent

View File

@@ -6,7 +6,7 @@ from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from
import os import os
def test_setup(client, live_server): def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
def set_original_response(): def set_original_response():
@@ -39,7 +39,7 @@ def set_modified_response():
f.write(test_return_data) f.write(test_return_data)
return None return None
def test_setup_group_tag(client, live_server): def test_setup_group_tag(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
set_original_response() set_original_response()
@@ -130,7 +130,7 @@ def test_setup_group_tag(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_tag_import_singular(client, live_server): def test_tag_import_singular(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
@@ -150,7 +150,7 @@ def test_tag_import_singular(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_tag_add_in_ui(client, live_server): def test_tag_add_in_ui(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
# #
res = client.post( res = client.post(
@@ -167,7 +167,7 @@ def test_tag_add_in_ui(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_group_tag_notification(client, live_server): def test_group_tag_notification(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
set_original_response() set_original_response()
@@ -235,7 +235,7 @@ def test_group_tag_notification(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_limit_tag_ui(client, live_server): def test_limit_tag_ui(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
@@ -273,7 +273,7 @@ def test_limit_tag_ui(client, live_server):
assert b'Deleted' in res.data assert b'Deleted' in res.data
res = client.get(url_for("tags.delete_all"), follow_redirects=True) res = client.get(url_for("tags.delete_all"), follow_redirects=True)
assert b'All tags deleted' in res.data assert b'All tags deleted' in res.data
def test_clone_tag_on_import(client, live_server): def test_clone_tag_on_import(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
res = client.post( res = client.post(
@@ -298,7 +298,7 @@ def test_clone_tag_on_import(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_clone_tag_on_quickwatchform_add(client, live_server): def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
@@ -328,7 +328,7 @@ def test_clone_tag_on_quickwatchform_add(client, live_server):
res = client.get(url_for("tags.delete_all"), follow_redirects=True) res = client.get(url_for("tags.delete_all"), follow_redirects=True)
assert b'All tags deleted' in res.data assert b'All tags deleted' in res.data
def test_order_of_filters_tag_filter_and_watch_filter(client, live_server): def test_order_of_filters_tag_filter_and_watch_filter(client, live_server, measure_memory_usage):
# Add a tag with some config, import a tag and it should roughly work # Add a tag with some config, import a tag and it should roughly work
res = client.post( res = client.post(

View File

@@ -5,15 +5,13 @@ import os
import json import json
import logging import logging
from flask import url_for from flask import url_for
from .util import live_server_setup from .util import live_server_setup, wait_for_all_checks
from urllib.parse import urlparse, parse_qs from urllib.parse import urlparse, parse_qs
def test_consistent_history(client, live_server): def test_consistent_history(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
# Give the endpoint time to spin up r = range(1, 30)
time.sleep(1)
r = range(1, 50)
for one in r: for one in r:
test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True) test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
@@ -25,15 +23,8 @@ def test_consistent_history(client, live_server):
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(3) wait_for_all_checks(client)
while True:
res = client.get(url_for("index"))
logging.debug("Waiting for 'Checking now' to go away..")
if b'Checking now' not in res.data:
break
time.sleep(0.5)
time.sleep(3)
# Essentially just triggers the DB write/update # Essentially just triggers the DB write/update
res = client.post( res = client.post(
url_for("settings_page"), url_for("settings_page"),
@@ -44,8 +35,9 @@ def test_consistent_history(client, live_server):
) )
assert b"Settings updated." in res.data assert b"Settings updated." in res.data
# Give it time to write it out
time.sleep(3) time.sleep(2)
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
json_obj = None json_obj = None
@@ -58,7 +50,7 @@ def test_consistent_history(client, live_server):
# each one should have a history.txt containing just one line # each one should have a history.txt containing just one line
for w in json_obj['watching'].keys(): for w in json_obj['watching'].keys():
history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt') history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
assert os.path.isfile(history_txt_index_file), "History.txt should exist where I expect it - {}".format(history_txt_index_file) assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"
# Same like in model.Watch # Same like in model.Watch
with open(history_txt_index_file, "r") as f: with open(history_txt_index_file, "r") as f:
@@ -70,15 +62,15 @@ def test_consistent_history(client, live_server):
w)) w))
# Find the snapshot one # Find the snapshot one
for fname in files_in_watch_dir: for fname in files_in_watch_dir:
if fname != 'history.txt': if fname != 'history.txt' and 'html' not in fname:
# contents should match what we requested as content returned from the test url # contents should match what we requested as content returned from the test url
with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f: with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f:
contents = snapshot_f.read() contents = snapshot_f.read()
watch_url = json_obj['watching'][w]['url'] watch_url = json_obj['watching'][w]['url']
u = urlparse(watch_url) u = urlparse(watch_url)
q = parse_qs(u[4]) q = parse_qs(u[4])
assert q['content'][0] == contents.strip(), "Snapshot file {} should contain {}".format(fname, q['content'][0]) assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}"
assert len(files_in_watch_dir) == 2, "Should be just two files in the dir, history.txt and the snapshot" assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"

View File

@@ -82,7 +82,7 @@ def set_modified_ignore_response():
f.write(test_return_data) f.write(test_return_data)
def test_check_ignore_text_functionality(client, live_server): def test_check_ignore_text_functionality(client, live_server, measure_memory_usage):
# Use a mix of case in ZzZ to prove it works case-insensitive. # Use a mix of case in ZzZ to prove it works case-insensitive.
ignore_text = "XXXXX\r\nYYYYY\r\nzZzZZ\r\nnew ignore stuff" ignore_text = "XXXXX\r\nYYYYY\r\nzZzZZ\r\nnew ignore stuff"
@@ -164,7 +164,7 @@ def test_check_ignore_text_functionality(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_check_global_ignore_text_functionality(client, live_server): def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage):
# Give the endpoint time to spin up # Give the endpoint time to spin up
time.sleep(1) time.sleep(1)

View File

@@ -23,7 +23,7 @@ def set_original_ignore_response():
f.write(test_return_data) f.write(test_return_data)
def test_highlight_ignore(client, live_server): def test_highlight_ignore(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
set_original_ignore_response() set_original_ignore_response()
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
@@ -45,7 +45,6 @@ def test_highlight_ignore(client, live_server):
) )
res = client.get(url_for("edit_page", uuid=uuid)) res = client.get(url_for("edit_page", uuid=uuid))
# should be a regex now # should be a regex now
assert b'/oh\ yeah\ \d+/' in res.data assert b'/oh\ yeah\ \d+/' in res.data
@@ -55,3 +54,7 @@ def test_highlight_ignore(client, live_server):
# And it should register in the preview page # And it should register in the preview page
res = client.get(url_for("preview_page", uuid=uuid)) res = client.get(url_for("preview_page", uuid=uuid))
assert b'<div class="ignored">oh yeah 456' in res.data assert b'<div class="ignored">oh yeah 456' in res.data
# Should be in base.html
assert b'csrftoken' in res.data

View File

@@ -40,7 +40,7 @@ def set_modified_ignore_response():
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data) f.write(test_return_data)
def test_render_anchor_tag_content_true(client, live_server): def test_render_anchor_tag_content_true(client, live_server, measure_memory_usage):
"""Testing that the link changes are detected when """Testing that the link changes are detected when
render_anchor_tag_content setting is set to true""" render_anchor_tag_content setting is set to true"""
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3

View File

@@ -39,7 +39,7 @@ def set_some_changed_response():
f.write(test_return_data) f.write(test_return_data)
def test_normal_page_check_works_with_ignore_status_code(client, live_server): def test_normal_page_check_works_with_ignore_status_code(client, live_server, measure_memory_usage):
# Give the endpoint time to spin up # Give the endpoint time to spin up
@@ -85,7 +85,7 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server):
# Tests the whole stack works with staus codes ignored # Tests the whole stack works with staus codes ignored
def test_403_page_check_works_with_ignore_status_code(client, live_server): def test_403_page_check_works_with_ignore_status_code(client, live_server, measure_memory_usage):
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
set_original_response() set_original_response()

View File

@@ -49,7 +49,7 @@ def set_original_ignore_response():
# If there was only a change in the whitespacing, then we shouldnt have a change detected # If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_check_ignore_whitespace(client, live_server): def test_check_ignore_whitespace(client, live_server, measure_memory_usage):
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
# Give the endpoint time to spin up # Give the endpoint time to spin up

View File

@@ -8,10 +8,10 @@ from flask import url_for
from .util import live_server_setup, wait_for_all_checks from .util import live_server_setup, wait_for_all_checks
def test_setup(client, live_server): def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
def test_import(client, live_server): def test_import(client, live_server, measure_memory_usage):
# Give the endpoint time to spin up # Give the endpoint time to spin up
wait_for_all_checks(client) wait_for_all_checks(client)
@@ -34,7 +34,7 @@ https://example.com tag1, other tag"""
res = client.get( url_for("index")) res = client.get( url_for("index"))
res = client.get( url_for("index")) res = client.get( url_for("index"))
def xtest_import_skip_url(client, live_server): def xtest_import_skip_url(client, live_server, measure_memory_usage):
# Give the endpoint time to spin up # Give the endpoint time to spin up
@@ -57,7 +57,7 @@ def xtest_import_skip_url(client, live_server):
# Clear flask alerts # Clear flask alerts
res = client.get( url_for("index")) res = client.get( url_for("index"))
def test_import_distillio(client, live_server): def test_import_distillio(client, live_server, measure_memory_usage):
distill_data=''' distill_data='''
{ {
@@ -123,7 +123,7 @@ def test_import_distillio(client, live_server):
# Clear flask alerts # Clear flask alerts
res = client.get(url_for("index")) res = client.get(url_for("index"))
def test_import_custom_xlsx(client, live_server): def test_import_custom_xlsx(client, live_server, measure_memory_usage):
"""Test can upload a excel spreadsheet and the watches are created correctly""" """Test can upload a excel spreadsheet and the watches are created correctly"""
#live_server_setup(live_server) #live_server_setup(live_server)
@@ -172,7 +172,7 @@ def test_import_custom_xlsx(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_import_watchete_xlsx(client, live_server): def test_import_watchete_xlsx(client, live_server, measure_memory_usage):
"""Test can upload a excel spreadsheet and the watches are created correctly""" """Test can upload a excel spreadsheet and the watches are created correctly"""
#live_server_setup(live_server) #live_server_setup(live_server)

View File

@@ -5,11 +5,11 @@ from flask import url_for
from .util import live_server_setup, wait_for_all_checks from .util import live_server_setup, wait_for_all_checks
def test_setup(client, live_server): def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
# If there was only a change in the whitespacing, then we shouldnt have a change detected # If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_jinja2_in_url_query(client, live_server): def test_jinja2_in_url_query(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
# Add our URL to the import page # Add our URL to the import page
@@ -34,7 +34,7 @@ def test_jinja2_in_url_query(client, live_server):
assert b'date=2' in res.data assert b'date=2' in res.data
# https://techtonics.medium.com/secure-templating-with-jinja2-understanding-ssti-and-jinja2-sandbox-environment-b956edd60456 # https://techtonics.medium.com/secure-templating-with-jinja2-understanding-ssti-and-jinja2-sandbox-environment-b956edd60456
def test_jinja2_security_url_query(client, live_server): def test_jinja2_security_url_query(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
# Add our URL to the import page # Add our URL to the import page

View File

@@ -41,19 +41,26 @@ and it can also be repeated
from .. import html_tools from .. import html_tools
# See that we can find the second <script> one, which is not broken, and matches our filter # See that we can find the second <script> one, which is not broken, and matches our filter
text = html_tools.extract_json_as_string(content, "json:$.offers.price") text = html_tools.extract_json_as_string(content, "json:$.offers.priceCurrency")
assert text == "23.5" assert text == '"AUD"'
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
assert text == "5"
# also check for jq # also check for jq
if jq_support: if jq_support:
text = html_tools.extract_json_as_string(content, "jq:.offers.price") text = html_tools.extract_json_as_string(content, "jq:.offers.priceCurrency")
assert text == "23.5" assert text == '"AUD"'
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id") text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
assert text == "5" assert text == "5"
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id") text = html_tools.extract_json_as_string(content, "jqraw:.offers.priceCurrency")
assert text == "5" assert text == "AUD"
text = html_tools.extract_json_as_string('{"id":5}', "jqraw:.id")
assert text == "5"
# When nothing at all is found, it should throw JSONNOTFound # When nothing at all is found, it should throw JSONNOTFound
# Which is caught and shown to the user in the watch-overview table # Which is caught and shown to the user in the watch-overview table
@@ -64,6 +71,9 @@ and it can also be repeated
with pytest.raises(html_tools.JSONNotFound) as e_info: with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id") html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jqraw:.id")
def test_unittest_inline_extract_body(): def test_unittest_inline_extract_body():
content = """ content = """
@@ -191,7 +201,7 @@ def set_modified_response():
return None return None
def test_check_json_without_filter(client, live_server): def test_check_json_without_filter(client, live_server, measure_memory_usage):
# Request a JSON document from a application/json source containing HTML # Request a JSON document from a application/json source containing HTML
# and be sure it doesn't get chewed up by instriptis # and be sure it doesn't get chewed up by instriptis
set_json_response_with_html() set_json_response_with_html()
@@ -284,13 +294,17 @@ def check_json_filter(json_filter, client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_check_jsonpath_filter(client, live_server): def test_check_jsonpath_filter(client, live_server, measure_memory_usage):
check_json_filter('json:boss.name', client, live_server) check_json_filter('json:boss.name', client, live_server)
def test_check_jq_filter(client, live_server): def test_check_jq_filter(client, live_server, measure_memory_usage):
if jq_support: if jq_support:
check_json_filter('jq:.boss.name', client, live_server) check_json_filter('jq:.boss.name', client, live_server)
def test_check_jqraw_filter(client, live_server, measure_memory_usage):
if jq_support:
check_json_filter('jqraw:.boss.name', client, live_server)
def check_json_filter_bool_val(json_filter, client, live_server): def check_json_filter_bool_val(json_filter, client, live_server):
set_original_response() set_original_response()
@@ -338,10 +352,14 @@ def check_json_filter_bool_val(json_filter, client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_check_jsonpath_filter_bool_val(client, live_server): def test_check_jsonpath_filter_bool_val(client, live_server, measure_memory_usage):
check_json_filter_bool_val("json:$['available']", client, live_server) check_json_filter_bool_val("json:$['available']", client, live_server)
def test_check_jq_filter_bool_val(client, live_server): def test_check_jq_filter_bool_val(client, live_server, measure_memory_usage):
if jq_support:
check_json_filter_bool_val("jq:.available", client, live_server)
def test_check_jqraw_filter_bool_val(client, live_server, measure_memory_usage):
if jq_support: if jq_support:
check_json_filter_bool_val("jq:.available", client, live_server) check_json_filter_bool_val("jq:.available", client, live_server)
@@ -412,7 +430,7 @@ def check_json_ext_filter(json_filter, client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_ignore_json_order(client, live_server): def test_ignore_json_order(client, live_server, measure_memory_usage):
# A change in order shouldn't trigger a notification # A change in order shouldn't trigger a notification
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
@@ -454,7 +472,7 @@ def test_ignore_json_order(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_correct_header_detect(client, live_server): def test_correct_header_detect(client, live_server, measure_memory_usage):
# Like in https://github.com/dgtlmoon/changedetection.io/pull/1593 # Like in https://github.com/dgtlmoon/changedetection.io/pull/1593
# Specify extra html that JSON is sometimes wrapped in - when using SockpuppetBrowser / Puppeteer / Playwrightetc # Specify extra html that JSON is sometimes wrapped in - when using SockpuppetBrowser / Puppeteer / Playwrightetc
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
@@ -486,9 +504,13 @@ def test_correct_header_detect(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_check_jsonpath_ext_filter(client, live_server): def test_check_jsonpath_ext_filter(client, live_server, measure_memory_usage):
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server) check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
def test_check_jq_ext_filter(client, live_server): def test_check_jq_ext_filter(client, live_server, measure_memory_usage):
if jq_support: if jq_support:
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server) check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
def test_check_jqraw_ext_filter(client, live_server, measure_memory_usage):
if jq_support:
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)

View File

@@ -22,7 +22,7 @@ def set_nonrenderable_response():
return None return None
def test_check_basic_change_detection_functionality(client, live_server): def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
live_server_setup(live_server) live_server_setup(live_server)

View File

@@ -21,7 +21,7 @@ def test_setup(live_server):
# Hard to just add more live server URLs when one test is already running (I think) # Hard to just add more live server URLs when one test is already running (I think)
# So we add our test here (was in a different file) # So we add our test here (was in a different file)
def test_check_notification(client, live_server): def test_check_notification(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
set_original_response() set_original_response()
@@ -234,7 +234,7 @@ def test_check_notification(client, live_server):
follow_redirects=True follow_redirects=True
) )
def test_notification_validation(client, live_server): def test_notification_validation(client, live_server, measure_memory_usage):
time.sleep(1) time.sleep(1)
@@ -273,7 +273,7 @@ def test_notification_validation(client, live_server):
def test_notification_custom_endpoint_and_jinja2(client, live_server): def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
# test_endpoint - that sends the contents of a file # test_endpoint - that sends the contents of a file

View File

@@ -4,7 +4,7 @@ from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
import logging import logging
def test_check_notification_error_handling(client, live_server): def test_check_notification_error_handling(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
set_original_response() set_original_response()

View File

@@ -18,7 +18,7 @@ def set_original_ignore_response():
f.write(test_return_data) f.write(test_return_data)
def test_obfuscations(client, live_server): def test_obfuscations(client, live_server, measure_memory_usage):
set_original_ignore_response() set_original_ignore_response()
live_server_setup(live_server) live_server_setup(live_server)
time.sleep(1) time.sleep(1)

View File

@@ -6,7 +6,7 @@ from .util import set_original_response, set_modified_response, live_server_setu
# `subtractive_selectors` should still work in `source:` type requests # `subtractive_selectors` should still work in `source:` type requests
def test_fetch_pdf(client, live_server): def test_fetch_pdf(client, live_server, measure_memory_usage):
import shutil import shutil
shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf") shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf")

View File

@@ -9,7 +9,7 @@ def test_setup(live_server):
# Hard to just add more live server URLs when one test is already running (I think) # Hard to just add more live server URLs when one test is already running (I think)
# So we add our test here (was in a different file) # So we add our test here (was in a different file)
def test_headers_in_request(client, live_server): def test_headers_in_request(client, live_server, measure_memory_usage):
#ve_server_setup(live_server) #ve_server_setup(live_server)
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_headers', _external=True) test_url = url_for('test_headers', _external=True)
@@ -84,7 +84,7 @@ def test_headers_in_request(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_body_in_request(client, live_server): def test_body_in_request(client, live_server, measure_memory_usage):
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_body', _external=True) test_url = url_for('test_body', _external=True)
@@ -177,7 +177,7 @@ def test_body_in_request(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_method_in_request(client, live_server): def test_method_in_request(client, live_server, measure_memory_usage):
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_method', _external=True) test_url = url_for('test_method', _external=True)
if os.getenv('PLAYWRIGHT_DRIVER_URL'): if os.getenv('PLAYWRIGHT_DRIVER_URL'):
@@ -254,7 +254,7 @@ def test_method_in_request(client, live_server):
assert b'Deleted' in res.data assert b'Deleted' in res.data
# Re #2408 - user-agent override test, also should handle case-insensitive header deduplication # Re #2408 - user-agent override test, also should handle case-insensitive header deduplication
def test_ua_global_override(client, live_server): def test_ua_global_override(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # live_server_setup(live_server)
test_url = url_for('test_headers', _external=True) test_url = url_for('test_headers', _external=True)
@@ -309,7 +309,7 @@ def test_ua_global_override(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_headers_textfile_in_request(client, live_server): def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
# Add our URL to the import page # Add our URL to the import page

View File

@@ -49,10 +49,10 @@ def set_original_cdata_xml():
f.write(test_return_data) f.write(test_return_data)
def test_setup(client, live_server): def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
def test_rss_and_token(client, live_server): def test_rss_and_token(client, live_server, measure_memory_usage):
# live_server_setup(live_server) # live_server_setup(live_server)
set_original_response() set_original_response()
@@ -69,6 +69,7 @@ def test_rss_and_token(client, live_server):
wait_for_all_checks(client) wait_for_all_checks(client)
set_modified_response() set_modified_response()
time.sleep(1)
client.get(url_for("form_watch_checknow"), follow_redirects=True) client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client) wait_for_all_checks(client)
@@ -87,9 +88,9 @@ def test_rss_and_token(client, live_server):
assert b"Access denied, bad token" not in res.data assert b"Access denied, bad token" not in res.data
assert b"Random content" in res.data assert b"Random content" in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_basic_cdata_rss_markup(client, live_server): def test_basic_cdata_rss_markup(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
set_original_cdata_xml() set_original_cdata_xml()
@@ -117,7 +118,7 @@ def test_basic_cdata_rss_markup(client, live_server):
assert b'The days of Terminator' in res.data assert b'The days of Terminator' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_rss_xpath_filtering(client, live_server): def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
set_original_cdata_xml() set_original_cdata_xml()

View File

@@ -5,7 +5,7 @@ import time
def test_setup(live_server): def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
def test_basic_search(client, live_server): def test_basic_search(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
urls = ['https://localhost:12300?first-result=1', urls = ['https://localhost:12300?first-result=1',
@@ -38,7 +38,7 @@ def test_basic_search(client, live_server):
assert urls[1].encode('utf-8') not in res.data assert urls[1].encode('utf-8') not in res.data
def test_search_in_tag_limit(client, live_server): def test_search_in_tag_limit(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
urls = ['https://localhost:12300?first-result=1 tag-one', urls = ['https://localhost:12300?first-result=1 tag-one',

View File

@@ -2,10 +2,10 @@ from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
import time import time
def test_setup(client, live_server): def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
def test_bad_access(client, live_server): def test_bad_access(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
res = client.post( res = client.post(
url_for("import_page"), url_for("import_page"),
@@ -67,7 +67,7 @@ def test_bad_access(client, live_server):
assert b'file:// type access is denied for security reasons.' in res.data assert b'file:// type access is denied for security reasons.' in res.data
def test_xss(client, live_server): def test_xss(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
from changedetectionio.notification import ( from changedetectionio.notification import (
default_notification_format default_notification_format

View File

@@ -9,7 +9,7 @@ import re
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
def test_share_watch(client, live_server): def test_share_watch(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
live_server_setup(live_server) live_server_setup(live_server)

View File

@@ -10,7 +10,7 @@ sleep_time_for_fetch_thread = 3
def test_setup(live_server): def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
def test_check_basic_change_detection_functionality_source(client, live_server): def test_check_basic_change_detection_functionality_source(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
test_url = 'source:'+url_for('test_endpoint', _external=True) test_url = 'source:'+url_for('test_endpoint', _external=True)
# Add our URL to the import page # Add our URL to the import page
@@ -58,7 +58,7 @@ def test_check_basic_change_detection_functionality_source(client, live_server):
# `subtractive_selectors` should still work in `source:` type requests # `subtractive_selectors` should still work in `source:` type requests
def test_check_ignore_elements(client, live_server): def test_check_ignore_elements(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
time.sleep(1) time.sleep(1)
test_url = 'source:'+url_for('test_endpoint', _external=True) test_url = 'source:'+url_for('test_endpoint', _external=True)

View File

@@ -55,7 +55,7 @@ def set_modified_with_trigger_text_response():
f.write(test_return_data) f.write(test_return_data)
def test_trigger_functionality(client, live_server): def test_trigger_functionality(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)

View File

@@ -22,7 +22,7 @@ def set_original_ignore_response():
def test_trigger_regex_functionality(client, live_server): def test_trigger_regex_functionality(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)

View File

@@ -22,7 +22,7 @@ def set_original_ignore_response():
def test_trigger_regex_functionality_with_filter(client, live_server): def test_trigger_regex_functionality_with_filter(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3

View File

@@ -66,10 +66,10 @@ def set_modified_with_trigger_text_response():
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data) f.write(test_return_data)
def test_setup(client, live_server): def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
def test_unique_lines_functionality(client, live_server): def test_unique_lines_functionality(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
@@ -118,7 +118,7 @@ def test_unique_lines_functionality(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_sort_lines_functionality(client, live_server): def test_sort_lines_functionality(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
set_modified_swapped_lines_with_extra_text_for_sorting() set_modified_swapped_lines_with_extra_text_for_sorting()

View File

@@ -4,7 +4,7 @@ from urllib.request import urlopen
from . util import set_original_response, set_modified_response, live_server_setup from . util import set_original_response, set_modified_response, live_server_setup
def test_check_watch_field_storage(client, live_server): def test_check_watch_field_storage(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
live_server_setup(live_server) live_server_setup(live_server)

View File

@@ -49,7 +49,7 @@ def set_modified_response():
# Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613 # Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613
def test_check_xpath_filter_utf8(client, live_server): def test_check_xpath_filter_utf8(client, live_server, measure_memory_usage):
filter = '//item/*[self::description]' filter = '//item/*[self::description]'
d = '''<?xml version="1.0" encoding="UTF-8"?> d = '''<?xml version="1.0" encoding="UTF-8"?>
@@ -105,7 +105,7 @@ def test_check_xpath_filter_utf8(client, live_server):
# Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613 # Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613
def test_check_xpath_text_function_utf8(client, live_server): def test_check_xpath_text_function_utf8(client, live_server, measure_memory_usage):
filter = '//item/title/text()' filter = '//item/title/text()'
d = '''<?xml version="1.0" encoding="UTF-8"?> d = '''<?xml version="1.0" encoding="UTF-8"?>
@@ -168,7 +168,7 @@ def test_check_xpath_text_function_utf8(client, live_server):
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_check_markup_xpath_filter_restriction(client, live_server): def test_check_markup_xpath_filter_restriction(client, live_server, measure_memory_usage):
xpath_filter = "//*[contains(@class, 'sametext')]" xpath_filter = "//*[contains(@class, 'sametext')]"
set_original_response() set_original_response()
@@ -214,7 +214,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_xpath_validation(client, live_server): def test_xpath_validation(client, live_server, measure_memory_usage):
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
res = client.post( res = client.post(
@@ -235,7 +235,7 @@ def test_xpath_validation(client, live_server):
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_xpath23_prefix_validation(client, live_server): def test_xpath23_prefix_validation(client, live_server, measure_memory_usage):
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
res = client.post( res = client.post(
@@ -255,7 +255,7 @@ def test_xpath23_prefix_validation(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_xpath1_lxml(client, live_server): def test_xpath1_lxml(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
d = '''<?xml version="1.0" encoding="UTF-8"?> d = '''<?xml version="1.0" encoding="UTF-8"?>
@@ -319,7 +319,7 @@ def test_xpath1_lxml(client, live_server):
##### #####
def test_xpath1_validation(client, live_server): def test_xpath1_validation(client, live_server, measure_memory_usage):
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
res = client.post( res = client.post(
@@ -341,7 +341,7 @@ def test_xpath1_validation(client, live_server):
# actually only really used by the distll.io importer, but could be handy too # actually only really used by the distll.io importer, but could be handy too
def test_check_with_prefix_include_filters(client, live_server): def test_check_with_prefix_include_filters(client, live_server, measure_memory_usage):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data assert b'Deleted' in res.data
@@ -378,7 +378,7 @@ def test_check_with_prefix_include_filters(client, live_server):
client.get(url_for("form_delete", uuid="all"), follow_redirects=True) client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_various_rules(client, live_server): def test_various_rules(client, live_server, measure_memory_usage):
# Just check these don't error # Just check these don't error
# live_server_setup(live_server) # live_server_setup(live_server)
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
@@ -426,7 +426,7 @@ def test_various_rules(client, live_server):
assert b'Deleted' in res.data assert b'Deleted' in res.data
def test_xpath_20(client, live_server): def test_xpath_20(client, live_server, measure_memory_usage):
test_url = url_for('test_endpoint', _external=True) test_url = url_for('test_endpoint', _external=True)
res = client.post( res = client.post(
url_for("import_page"), url_for("import_page"),
@@ -463,7 +463,7 @@ def test_xpath_20(client, live_server):
client.get(url_for("form_delete", uuid="all"), follow_redirects=True) client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_xpath_20_function_count(client, live_server): def test_xpath_20_function_count(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
# Add our URL to the import page # Add our URL to the import page
@@ -499,7 +499,7 @@ def test_xpath_20_function_count(client, live_server):
client.get(url_for("form_delete", uuid="all"), follow_redirects=True) client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_xpath_20_function_count2(client, live_server): def test_xpath_20_function_count2(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
# Add our URL to the import page # Add our URL to the import page
@@ -535,7 +535,7 @@ def test_xpath_20_function_count2(client, live_server):
client.get(url_for("form_delete", uuid="all"), follow_redirects=True) client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
def test_xpath_20_function_string_join_matches(client, live_server): def test_xpath_20_function_string_join_matches(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
# Add our URL to the import page # Add our URL to the import page

View File

@@ -4,12 +4,12 @@ import os
from flask import url_for from flask import url_for
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
def test_setup(client, live_server): def test_setup(client, live_server, measure_memory_usage):
live_server_setup(live_server) live_server_setup(live_server)
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready # Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
def test_visual_selector_content_ready(client, live_server): def test_visual_selector_content_ready(client, live_server, measure_memory_usage):
import os import os
import json import json
@@ -79,7 +79,7 @@ def test_visual_selector_content_ready(client, live_server):
follow_redirects=True follow_redirects=True
) )
def test_basic_browserstep(client, live_server): def test_basic_browserstep(client, live_server, measure_memory_usage):
#live_server_setup(live_server) #live_server_setup(live_server)
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"

View File

@@ -1,11 +1,12 @@
import os
import threading
import queue
import time
from . import content_fetchers from . import content_fetchers
from changedetectionio import html_tools
from .processors.text_json_diff import FilterNotFoundInResponse
from .processors.restock_diff import UnableToExtractRestockData from .processors.restock_diff import UnableToExtractRestockData
from .processors.text_json_diff import FilterNotFoundInResponse
from changedetectionio import html_tools
from copy import deepcopy
import os
import queue
import threading
import time
# A single update worker # A single update worker
# #
@@ -245,14 +246,18 @@ class update_worker(threading.Thread):
contents = b'' contents = b''
process_changedetection_results = True process_changedetection_results = True
update_obj = {} update_obj = {}
logger.info(f"Processing watch UUID {uuid} "
f"Priority {queued_item_data.priority} " # Clear last errors (move to preflight func?)
f"URL {self.datastore.data['watching'][uuid]['url']}") self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
watch = self.datastore.data['watching'].get(uuid)
logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
now = time.time() now = time.time()
try: try:
# Processor is what we are using for detecting the "Change" # Processor is what we are using for detecting the "Change"
processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff') processor = watch.get('processor', 'text_json_diff')
# if system... # if system...
# Abort processing when the content was the same as the last fetch # Abort processing when the content was the same as the last fetch
@@ -272,14 +277,12 @@ class update_worker(threading.Thread):
watch_uuid=uuid watch_uuid=uuid
) )
# Clear last errors (move to preflight func?)
self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
update_handler.call_browser() update_handler.call_browser()
changed_detected, update_obj, contents = update_handler.run_changedetection(uuid, changed_detected, update_obj, contents = update_handler.run_changedetection(
skip_when_checksum_same=skip_when_same_checksum, watch=watch,
) skip_when_checksum_same=skip_when_same_checksum,
)
# Re #342 # Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
@@ -309,7 +312,11 @@ class update_worker(threading.Thread):
}) })
if e.screenshot: if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot) watch.save_screenshot(screenshot=e.screenshot, as_error=True)
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
process_changedetection_results = False process_changedetection_results = False
except content_fetchers.exceptions.Non200ErrorCodeReceived as e: except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
@@ -325,11 +332,11 @@ class update_worker(threading.Thread):
err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code)) err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
if e.screenshot: if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) watch.save_screenshot(screenshot=e.screenshot, as_error=True)
if e.xpath_data: if e.xpath_data:
self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True) watch.save_xpath_data(data=e.xpath_data, as_error=True)
if e.page_text: if e.page_text:
self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text) watch.save_error_text(contents=e.page_text)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
process_changedetection_results = False process_changedetection_results = False
@@ -341,16 +348,23 @@ class update_worker(threading.Thread):
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary." err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
# Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot)
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
# Only when enabled, send the notification # Only when enabled, send the notification
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False): if watch.get('filter_failure_notification_send', False):
c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5) c = watch.get('consecutive_filter_failures', 5)
c += 1 c += 1
# Send notification if we reached the threshold? # Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0) 0)
logger.error(f"Filter for {uuid} not found, consecutive_filter_failures: {c}") logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
if threshold > 0 and c >= threshold: if threshold > 0 and c >= threshold:
if not self.datastore.data['watching'][uuid].get('notification_muted'): if not watch.get('notification_muted'):
self.send_filter_failure_notification(uuid) self.send_filter_failure_notification(uuid)
c = 0 c = 0
@@ -362,7 +376,6 @@ class update_worker(threading.Thread):
# Yes fine, so nothing todo, don't continue to process. # Yes fine, so nothing todo, don't continue to process.
process_changedetection_results = False process_changedetection_results = False
changed_detected = False changed_detected = False
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': False})
except content_fetchers.exceptions.BrowserConnectError as e: except content_fetchers.exceptions.BrowserConnectError as e:
self.datastore.update_watch(uuid=uuid, self.datastore.update_watch(uuid=uuid,
update_obj={'last_error': e.msg update_obj={'last_error': e.msg
@@ -401,15 +414,15 @@ class update_worker(threading.Thread):
} }
) )
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False): if watch.get('filter_failure_notification_send', False):
c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5) c = watch.get('consecutive_filter_failures', 5)
c += 1 c += 1
# Send notification if we reached the threshold? # Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0) 0)
logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}") logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
if threshold > 0 and c >= threshold: if threshold > 0 and c >= threshold:
if not self.datastore.data['watching'][uuid].get('notification_muted'): if not watch.get('notification_muted'):
self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n) self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
c = 0 c = 0
@@ -431,7 +444,7 @@ class update_worker(threading.Thread):
except content_fetchers.exceptions.JSActionExceptions as e: except content_fetchers.exceptions.JSActionExceptions as e:
err_text = "Error running JS Actions - Page request - "+e.message err_text = "Error running JS Actions - Page request - "+e.message
if e.screenshot: if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) watch.save_screenshot(screenshot=e.screenshot, as_error=True)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code}) 'last_check_status': e.status_code})
process_changedetection_results = False process_changedetection_results = False
@@ -441,7 +454,7 @@ class update_worker(threading.Thread):
err_text = "{} - {}".format(err_text, e.message) err_text = "{} - {}".format(err_text, e.message)
if e.screenshot: if e.screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) watch.save_screenshot(screenshot=e.screenshot, as_error=True)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code, 'last_check_status': e.status_code,
@@ -465,8 +478,6 @@ class update_worker(threading.Thread):
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)}) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
# Other serious error # Other serious error
process_changedetection_results = False process_changedetection_results = False
# import traceback
# print(traceback.format_exc())
else: else:
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc) # Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
@@ -474,7 +485,7 @@ class update_worker(threading.Thread):
continue continue
# Mark that we never had any failures # Mark that we never had any failures
if not self.datastore.data['watching'][uuid].get('ignore_status_codes'): if not watch.get('ignore_status_codes'):
update_obj['consecutive_filter_failures'] = 0 update_obj['consecutive_filter_failures'] = 0
# Everything ran OK, clean off any previous error # Everything ran OK, clean off any previous error
@@ -482,25 +493,48 @@ class update_worker(threading.Thread):
self.cleanup_error_artifacts(uuid) self.cleanup_error_artifacts(uuid)
if not self.datastore.data['watching'].get(uuid):
continue
# #
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
if process_changedetection_results: if process_changedetection_results:
# Always save the screenshot if it's available
if update_handler.screenshot:
watch.save_screenshot(screenshot=update_handler.screenshot)
if update_handler.xpath_data:
watch.save_xpath_data(data=update_handler.xpath_data)
try: try:
watch = self.datastore.data['watching'].get(uuid)
self.datastore.update_watch(uuid=uuid, update_obj=update_obj) self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
# Also save the snapshot on the first time checked # Also save the snapshot on the first time checked
if changed_detected or not watch['last_checked']: if changed_detected or not watch.get('last_checked'):
timestamp = round(time.time())
# Small hack so that we sleep just enough to allow 1 second between history snapshots
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
if watch.newest_history_key and int(timestamp) == int(watch.newest_history_key):
logger.warning(
f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
timestamp = str(int(timestamp) + 1)
time.sleep(1)
watch.save_history_text(contents=contents, watch.save_history_text(contents=contents,
timestamp=str(round(time.time())), timestamp=timestamp,
snapshot_id=update_obj.get('previous_md5', 'none')) snapshot_id=update_obj.get('previous_md5', 'none'))
if update_handler.fetcher.content:
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=timestamp)
# A change was detected # A change was detected
if changed_detected: if changed_detected:
# Notifications should only trigger on the second time (first time, we gather the initial snapshot) # Notifications should only trigger on the second time (first time, we gather the initial snapshot)
if watch.history_n >= 2: if watch.history_n >= 2:
logger.info(f"Change detected in UUID {uuid} - {watch['url']}") logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
if not self.datastore.data['watching'][uuid].get('notification_muted'): if not watch.get('notification_muted'):
self.send_content_changed_notification(watch_uuid=uuid) self.send_content_changed_notification(watch_uuid=uuid)
else: else:
logger.info(f"Change triggered in UUID {uuid} due to first history saving (no notifications sent) - {watch['url']}") logger.info(f"Change triggered in UUID {uuid} due to first history saving (no notifications sent) - {watch['url']}")
@@ -511,29 +545,23 @@ class update_worker(threading.Thread):
logger.critical(str(e)) logger.critical(str(e))
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
if self.datastore.data['watching'].get(uuid):
# Always record that we atleast tried
count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1
# Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds # Always record that we atleast tried
try: count = watch.get('check_count', 0) + 1
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
self.datastore.update_watch(uuid=uuid,
update_obj={'remote_server_reply': server_header}
)
except Exception as e:
pass
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
'last_checked': round(time.time()), try:
'check_count': count server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
}) self.datastore.update_watch(uuid=uuid,
update_obj={'remote_server_reply': server_header}
)
except Exception as e:
pass
# Always save the screenshot if it's available self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
if update_handler.screenshot: 'last_checked': round(time.time()),
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot) 'check_count': count
if update_handler.xpath_data: })
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
self.current_uuid = None # Done self.current_uuid = None # Done

View File

@@ -68,9 +68,10 @@ services:
# If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that # If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that
# and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used) # and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used)
# depends_on: # depends_on:
# browser-chrome: # playwright-chrome:
# condition: service_started # condition: service_started
# Used for fetching pages via Playwright+Chrome where you need Javascript support. # Used for fetching pages via Playwright+Chrome where you need Javascript support.
# RECOMMENDED FOR FETCHING PAGES WITH CHROME # RECOMMENDED FOR FETCHING PAGES WITH CHROME

View File

@@ -1,7 +1,7 @@
# Used by Pyppeteer # Used by Pyppeteer
pyee pyee
eventlet==0.35.2 # related to dnspython fixes eventlet>=0.36.1 # fixes SSL error on Python 3.12
feedgen~=0.9 feedgen~=0.9
flask-compress flask-compress
# 0.6.3 included compatibility fix for werkzeug 3.x (2.x had deprecation of url handlers) # 0.6.3 included compatibility fix for werkzeug 3.x (2.x had deprecation of url handlers)
@@ -23,13 +23,13 @@ validators~=0.21
brotli~=1.0 brotli~=1.0
requests[socks] requests[socks]
urllib3==1.26.18 urllib3==1.26.19
chardet>2.3.0 chardet>2.3.0
wtforms~=3.0 wtforms~=3.0
jsonpath-ng~=1.5.3 jsonpath-ng~=1.5.3
dnspython==2.6.1 dnspython==2.6.1 # related to eventlet fixes
# jq not available on Windows so must be installed manually # jq not available on Windows so must be installed manually
@@ -41,10 +41,8 @@ apprise~=1.8.0
# use v1.x due to https://github.com/eclipse/paho.mqtt.python/issues/814 # use v1.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
paho-mqtt>=1.6.1,<2.0.0 paho-mqtt>=1.6.1,<2.0.0
# This mainly affects some ARM builds, which unlike the other builds ignores "ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1" # Requires extra wheel for rPi
# so without this pinning, the newer versions on ARM will forcefully try to build rust, which results in "rust compiler not found" cryptography~=42.0.8
# (introduced once apprise became a dep)
cryptography~=3.4
# Used for CSS filtering # Used for CSS filtering
beautifulsoup4 beautifulsoup4
@@ -85,4 +83,4 @@ jsonschema==4.17.3
loguru loguru
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096 # Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
greenlet >= 3.0.3 greenlet >= 3.0.3