Compare commits

..

3 Commits

Author SHA1 Message Date
dgtlmoon
eb4cd35317 bump test
Some checks failed
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-04-25 18:45:21 +02:00
dgtlmoon
dc75043562 Improve test coverage for source: 2025-04-22 18:55:24 +02:00
dgtlmoon
38fffda890 Global ignore check - add regex 2025-04-22 18:42:11 +02:00
9 changed files with 32 additions and 170 deletions

View File

@@ -5,7 +5,7 @@ from json_logic.builtins import BUILTINS
from .exceptions import EmptyConditionRuleRowNotUsable from .exceptions import EmptyConditionRuleRowNotUsable
from .pluggy_interface import plugin_manager # Import the pluggy plugin manager from .pluggy_interface import plugin_manager # Import the pluggy plugin manager
from . import default_plugin from . import default_plugin
from loguru import logger
# List of all supported JSON Logic operators # List of all supported JSON Logic operators
operator_choices = [ operator_choices = [
(None, "Choose one - Operator"), (None, "Choose one - Operator"),
@@ -113,14 +113,12 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat
application_datastruct=application_datastruct, application_datastruct=application_datastruct,
ephemeral_data=ephemeral_data ephemeral_data=ephemeral_data
) )
logger.debug(f"Trying plugin {plugin}....")
# Set a timeout of 10 seconds # Set a timeout of 10 seconds
try: try:
new_execute_data = future.result(timeout=10) new_execute_data = future.result(timeout=10)
if new_execute_data and isinstance(new_execute_data, dict): if new_execute_data and isinstance(new_execute_data, dict):
EXECUTE_DATA.update(new_execute_data) EXECUTE_DATA.update(new_execute_data)
except concurrent.futures.TimeoutError: except concurrent.futures.TimeoutError:
# The plugin took too long, abort processing for this watch # The plugin took too long, abort processing for this watch
raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.") raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.")

View File

@@ -9,20 +9,15 @@ def levenshtein_ratio_recent_history(watch, incoming_text=None):
try: try:
from Levenshtein import ratio, distance from Levenshtein import ratio, distance
k = list(watch.history.keys()) k = list(watch.history.keys())
a = None if len(k) >= 2:
b = None # When called from ui_edit_stats_extras, we don't have incoming_text
if incoming_text is None:
# When called from ui_edit_stats_extras, we don't have incoming_text a = watch.get_history_snapshot(timestamp=k[-1]) # Latest snapshot
if incoming_text is None: b = watch.get_history_snapshot(timestamp=k[-2]) # Previous snapshot
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest snapshot else:
b = watch.get_history_snapshot(timestamp=k[-2]) # Previous snapshot a = watch.get_history_snapshot(timestamp=k[-2]) # Second newest, incoming_text will be "newest"
b = incoming_text
# Needs atleast one snapshot
elif len(k) >= 1: # Should be atleast one snapshot to compare against
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot
b = incoming_text if incoming_text else k[-2]
if a and b:
distance_value = distance(a, b) distance_value = distance(a, b)
ratio_value = ratio(a, b) ratio_value = ratio(a, b)
return { return {
@@ -58,7 +53,7 @@ def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
# ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc # ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc
if watch and 'text' in ephemeral_data: if watch and 'text' in ephemeral_data:
lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text','')) lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data['text'])
if isinstance(lev_data, dict): if isinstance(lev_data, dict):
res['levenshtein_ratio'] = lev_data.get('ratio', 0) res['levenshtein_ratio'] = lev_data.get('ratio', 0)
res['levenshtein_similarity'] = lev_data.get('percent_similar', 0) res['levenshtein_similarity'] = lev_data.get('percent_similar', 0)

View File

@@ -147,7 +147,7 @@ class fetcher(Fetcher):
is_binary, is_binary,
empty_pages_are_a_change empty_pages_are_a_change
): ):
import re
self.delete_browser_steps_screenshots() self.delete_browser_steps_screenshots()
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
@@ -172,17 +172,6 @@ class fetcher(Fetcher):
# headless - ask a new page # headless - ask a new page
self.page = (pages := await browser.pages) and len(pages) or await browser.newPage() self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
if '--window-size' in self.browser_connection_url:
# Be sure the viewport is always the window-size, this is often not the same thing
match = re.search(r'--window-size=(\d+),(\d+)', self.browser_connection_url)
if match:
logger.debug(f"Setting viewport to same as --window-size in browser connection URL {int(match.group(1))},{int(match.group(2))}")
await self.page.setViewport({
"width": int(match.group(1)),
"height": int(match.group(2))
})
logger.debug(f"Puppeteer viewport size {self.page.viewport}")
try: try:
from pyppeteerstealth import inject_evasions_into_page from pyppeteerstealth import inject_evasions_into_page
except ImportError: except ImportError:
@@ -229,6 +218,7 @@ class fetcher(Fetcher):
response = await self.page.goto(url, waitUntil="load") response = await self.page.goto(url, waitUntil="load")
if response is None: if response is None:
await self.page.close() await self.page.close()
await browser.close() await browser.close()

View File

@@ -51,7 +51,6 @@ async () => {
'niet op voorraad', 'niet op voorraad',
'no disponible', 'no disponible',
'no featured offers available', 'no featured offers available',
'no longer available',
'no longer in stock', 'no longer in stock',
'no tickets available', 'no tickets available',
'non disponibile', 'non disponibile',
@@ -126,20 +125,6 @@ async () => {
// so it's good to filter to just the 'above the fold' elements // so it's good to filter to just the 'above the fold' elements
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
function elementIsInEyeBallRange(element) {
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
// Note: theres also an automated test that places the 'out of stock' text fairly low down
// Skip text that could be in the header area
if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) {
return false;
}
// Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there
if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) {
return false;
}
return true;
}
// @todo - if it's SVG or IMG, go into image diff mode // @todo - if it's SVG or IMG, go into image diff mode
@@ -176,7 +161,9 @@ async () => {
for (let i = elementsToScan.length - 1; i >= 0; i--) { for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i]; const element = elementsToScan[i];
if (!elementIsInEyeBallRange(element)) { // outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
continue continue
} }
@@ -190,11 +177,11 @@ async () => {
} catch (e) { } catch (e) {
console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e); console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
} }
if (elementText.length) { if (elementText.length) {
// try which ones could mean its in stock // try which ones could mean its in stock
if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) { if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`) console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
element.style.border = "2px solid green"; // highlight the element that was detected as in stock
return 'Possibly in stock'; return 'Possibly in stock';
} }
} }
@@ -203,8 +190,10 @@ async () => {
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
for (let i = elementsToScan.length - 1; i >= 0; i--) { for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i]; const element = elementsToScan[i];
// outside the 'fold' or some weird text in the heading area
if (!elementIsInEyeBallRange(element)) { // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
// Note: theres also an automated test that places the 'out of stock' text fairly low down
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
continue continue
} }
elementText = ""; elementText = "";
@@ -219,7 +208,6 @@ async () => {
for (const outOfStockText of outOfStockTexts) { for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) { if (elementText.includes(outOfStockText)) {
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`) console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
element.style.border = "2px solid red"; // highlight the element that was detected as out of stock
return outOfStockText; // item is out of stock return outOfStockText; // item is out of stock
} }
} }

View File

@@ -202,6 +202,7 @@ async (options) => {
// Foreach filter, go and find it on the page and add it to the results so we can visualise it again // Foreach filter, go and find it on the page and add it to the results so we can visualise it again
for (const f of include_filters) { for (const f of include_filters) {
bbox = false; bbox = false;
q = false;
if (!f.length) { if (!f.length) {
console.log("xpath_element_scraper: Empty filter, skipping"); console.log("xpath_element_scraper: Empty filter, skipping");
@@ -254,7 +255,7 @@ async (options) => {
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
} catch (e) { } catch (e) {
console.log(e) console.log(e)
console.log("xpath_element_scraper: error looking up node.ownerElement") console.log("xpath_element_scraper: error looking up q.ownerElement")
} }
} }

View File

@@ -14,8 +14,6 @@ from changedetectionio.notification import (
def set_original_response(): def set_original_response():
test_return_data = """<html> test_return_data = """<html>
<body> <body>
<section id=header style="padding: 50px; height: 350px">This is the header which should be ignored always - <span>add to cart</span></section>
<!-- stock-not-in-stock.js will ignore text in the first 300px, see elementIsInEyeBallRange(), sometimes "add to cart" and other junk is here -->
Some initial text<br> Some initial text<br>
<p>Which is across multiple lines</p> <p>Which is across multiple lines</p>
<br> <br>
@@ -54,6 +52,8 @@ def test_restock_detection(client, live_server, measure_memory_usage):
set_original_response() set_original_response()
#assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
time.sleep(1)
live_server_setup(live_server) live_server_setup(live_server)
##################### #####################
notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json') notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json')
@@ -84,8 +84,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
# Is it correctly show as NOT in stock? # Is it correctly show as NOT in stock?
wait_for_all_checks(client) wait_for_all_checks(client)
res = client.get(url_for("watchlist.index")) res = client.get(url_for("watchlist.index"))
assert b'processor-restock_diff' in res.data # Should have saved in restock mode assert b'not-in-stock' in res.data
assert b'not-in-stock' in res.data # should be out of stock
# Is it correctly shown as in stock # Is it correctly shown as in stock
set_back_in_stock_response() set_back_in_stock_response()

View File

@@ -196,11 +196,7 @@ def test_condition_validate_rule_row(client, live_server):
) )
assert res.status_code == 200 assert res.status_code == 200
assert b'false' in res.data assert b'false' in res.data
# cleanup for the next
client.get(
url_for("ui.form_delete", uuid="all"),
follow_redirects=True
)
@@ -239,107 +235,4 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage):
) )
# Assert the word count is counted correctly # Assert the word count is counted correctly
assert b'<td>13</td>' in res.data assert b'<td>13</td>' in res.data
# cleanup for the next
client.get(
url_for("ui.form_delete", uuid="all"),
follow_redirects=True
)
# If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
</body>
</html>
""")
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
follow_redirects=True
)
assert b"Watch added in Paused state, saving will unpause" in res.data
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
# Give the thread time to pick it up
wait_for_all_checks(client)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
data={
"url": test_url,
"fetch_backend": "html_requests",
"conditions_match_logic": "ALL", # ALL = AND logic
"conditions-0-field": "levenshtein_ratio",
"conditions-0-operator": "<",
"conditions-0-value": "0.8" # needs to be more of a diff to trigger a change
},
follow_redirects=True
)
assert b"unpaused" in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'unviewed' not in res.data
# Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions
res = client.get(
url_for("ui.ui_views.preview_page", uuid=uuid),
follow_redirects=True
)
assert b'Which is across multiple lines' in res.data
############### Now change it a LITTLE bit...
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happenxxxxxxxxx. <br>
</body>
</html>
""")
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold
############### Now change it a MORE THAN 50%
test_return_data = """<html>
<body>
Some sxxxx<br>
<p>Which is across a lines</p>
<br>
ok. <br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'unviewed' in res.data
# cleanup for the next
client.get(
url_for("ui.form_delete", uuid="all"),
follow_redirects=True
)

View File

@@ -72,7 +72,7 @@ services:
# Comment out ports: when using behind a reverse proxy , enable networks: etc. # Comment out ports: when using behind a reverse proxy , enable networks: etc.
ports: ports:
- 127.0.0.1:5000:5000 - 5000:5000
restart: unless-stopped restart: unless-stopped
# Used for fetching pages via WebDriver+Chrome where you need Javascript support. # Used for fetching pages via WebDriver+Chrome where you need Javascript support.
@@ -82,7 +82,7 @@ services:
# If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that # If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that
# and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used) # and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used)
# depends_on: # depends_on:
# browser-sockpuppet-chrome: # sockpuppetbrowser:
# condition: service_started # condition: service_started

View File

@@ -53,7 +53,7 @@ lxml >=4.8.0,<6,!=5.2.0,!=5.2.1
# XPath 2.0-3.1 support - 4.2.0 broke something? # XPath 2.0-3.1 support - 4.2.0 broke something?
elementpath==4.1.5 elementpath==4.1.5
selenium~=4.31.0 selenium~=4.14.0
# https://github.com/pallets/werkzeug/issues/2985 # https://github.com/pallets/werkzeug/issues/2985
# Maybe related to pytest? # Maybe related to pytest?
@@ -90,8 +90,6 @@ extruct
# For cleaning up unknown currency formats # For cleaning up unknown currency formats
babel babel
levenshtein
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096 # Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
greenlet >= 3.0.3 greenlet >= 3.0.3