Compare commits

...

10 Commits

Author SHA1 Message Date
dgtlmoon
d0cbc6b08f test - also include header offset
Some checks failed
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-04-30 10:22:36 +02:00
dgtlmoon
62141e5032 Merge branch 'restock-price-scan-text-fix' of github.com:dgtlmoon/changedetection.io into restock-price-scan-text-fix 2025-04-30 10:14:25 +02:00
dgtlmoon
36806f9aae Bump test 2025-04-30 10:14:04 +02:00
dgtlmoon
fac247d419 Merge branch 'master' into restock-price-scan-text-fix 2025-04-29 17:58:53 +02:00
dgtlmoon
ed25d79ee1 Update stock-not-in-stock.js 2025-04-29 17:29:50 +02:00
dgtlmoon
fd7574d21b pyppeteer fast puppeteer fetch - be sure viewport is set to --window-size if --window-size is set (#3157)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-04-29 17:23:37 +02:00
dgtlmoon
b2f0157982 Use cleaner logic for limiting elements to scan 2025-04-29 16:37:58 +02:00
dgtlmoon
c70706a27b Improved global ignore test (#3140)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-04-29 11:20:21 +02:00
silversub
968c364999 Update docker-compose.yml (#3149)
Co-authored-by: silversub <silversub@gmail.com>
2025-04-29 11:20:00 +02:00
dgtlmoon
031cb76b7d Small fix for xpath element scraper (#3145)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2025-04-25 17:58:04 +02:00
6 changed files with 69 additions and 29 deletions

View File

@@ -147,7 +147,7 @@ class fetcher(Fetcher):
is_binary,
empty_pages_are_a_change
):
import re
self.delete_browser_steps_screenshots()
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
@@ -172,6 +172,17 @@ class fetcher(Fetcher):
# headless - ask a new page
self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
if '--window-size' in self.browser_connection_url:
# Be sure the viewport is always the window-size, this is often not the same thing
match = re.search(r'--window-size=(\d+),(\d+)', self.browser_connection_url)
if match:
logger.debug(f"Setting viewport to same as --window-size in browser connection URL {int(match.group(1))},{int(match.group(2))}")
await self.page.setViewport({
"width": int(match.group(1)),
"height": int(match.group(2))
})
logger.debug(f"Puppeteer viewport size {self.page.viewport}")
try:
from pyppeteerstealth import inject_evasions_into_page
except ImportError:
@@ -218,7 +229,6 @@ class fetcher(Fetcher):
response = await self.page.goto(url, waitUntil="load")
if response is None:
await self.page.close()
await browser.close()

View File

@@ -125,6 +125,20 @@ async () => {
// so it's good to filter to just the 'above the fold' elements
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
function elementIsInEyeBallRange(element) {
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
// Note: theres also an automated test that places the 'out of stock' text fairly low down
// Skip text that could be in the header area
if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) {
return false;
}
// Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there
if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) {
return false;
}
return true;
}
// @todo - if it's SVG or IMG, go into image diff mode
@@ -161,9 +175,7 @@ async () => {
for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i];
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
if (!elementIsInEyeBallRange(element)) {
continue
}
@@ -177,11 +189,11 @@ async () => {
} catch (e) {
console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
}
if (elementText.length) {
// try which ones could mean its in stock
if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
element.style.border = "2px solid green"; // highlight the element that was detected as in stock
return 'Possibly in stock';
}
}
@@ -190,10 +202,8 @@ async () => {
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i];
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
// Note: theres also an automated test that places the 'out of stock' text fairly low down
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
if (!elementIsInEyeBallRange(element)) {
continue
}
elementText = "";
@@ -208,6 +218,7 @@ async () => {
for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) {
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
element.style.border = "2px solid red"; // highlight the element that was detected as out of stock
return outOfStockText; // item is out of stock
}
}

View File

@@ -202,7 +202,6 @@ async (options) => {
// Foreach filter, go and find it on the page and add it to the results so we can visualise it again
for (const f of include_filters) {
bbox = false;
q = false;
if (!f.length) {
console.log("xpath_element_scraper: Empty filter, skipping");
@@ -255,7 +254,7 @@ async (options) => {
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
} catch (e) {
console.log(e)
console.log("xpath_element_scraper: error looking up q.ownerElement")
console.log("xpath_element_scraper: error looking up node.ownerElement")
}
}

View File

@@ -14,6 +14,8 @@ from changedetectionio.notification import (
def set_original_response():
test_return_data = """<html>
<body>
<section id=header style="padding: 50px; height: 350px">This is the header which should be ignored always - <span>add to cart</span></section>
<!-- stock-not-in-stock.js will ignore text in the first 300px, see elementIsInEyeBallRange(), sometimes "add to cart" and other junk is here -->
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
@@ -52,8 +54,6 @@ def test_restock_detection(client, live_server, measure_memory_usage):
set_original_response()
#assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
time.sleep(1)
live_server_setup(live_server)
#####################
notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json')
@@ -84,7 +84,8 @@ def test_restock_detection(client, live_server, measure_memory_usage):
# Is it correctly show as NOT in stock?
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'not-in-stock' in res.data
assert b'processor-restock_diff' in res.data # Should have saved in restock mode
assert b'not-in-stock' in res.data # should be out of stock
# Is it correctly shown as in stock
set_back_in_stock_response()

View File

@@ -32,13 +32,14 @@ def test_strip_text_func():
stripped_content = html_tools.strip_ignore_text(test_content, ignore)
assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens."
def set_original_ignore_response():
test_return_data = """<html>
def set_original_ignore_response(ver_stamp="123"):
test_return_data = f"""<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
<link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/>
</body>
</html>
@@ -48,13 +49,14 @@ def set_original_ignore_response():
f.write(test_return_data)
def set_modified_original_ignore_response():
test_return_data = """<html>
def set_modified_original_ignore_response(ver_stamp="123"):
test_return_data = f"""<html>
<body>
Some NEW nice initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
<link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/>
<p>new ignore stuff</p>
<p>blah</p>
</body>
@@ -67,14 +69,15 @@ def set_modified_original_ignore_response():
# Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text
def set_modified_ignore_response():
test_return_data = """<html>
def set_modified_ignore_response(ver_stamp="123"):
test_return_data = f"""<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<P>ZZZZz</P>
<br>
So let's see what happens. <br>
<link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/>
</body>
</html>
@@ -165,9 +168,9 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
assert b'Deleted' in res.data
# When adding some ignore text, it should not trigger a change, even if something else on that line changes
def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ"
def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ\r\n"+extra_ignore
set_original_ignore_response()
# Goto the settings page, add our ignore text
@@ -186,6 +189,10 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
if as_source:
# Switch to source mode so we can test that too!
test_url = "source:"+test_url
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
@@ -203,12 +210,15 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
# Check it saved
res = client.get(
url_for("settings.settings_page"),
)
assert bytes(ignore_text.encode('utf-8')) in res.data
for i in ignore_text.splitlines():
assert bytes(i.encode('utf-8')) in res.data
# Trigger a check
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
@@ -221,7 +231,8 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
# Make a change which includes the ignore text, it should be ignored and no 'change' triggered
# It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list
set_modified_ignore_response()
# And tweaks the ver_stamp which should be picked up by global regex ignore
set_modified_ignore_response(ver_stamp=time.time())
# Trigger a check
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
@@ -243,3 +254,11 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_check_global_ignore_text_functionality(client, live_server):
#live_server_setup(live_server)
_run_test_global_ignore(client, as_source=False)
def test_check_global_ignore_text_functionality_as_source(client, live_server):
#live_server_setup(live_server)
_run_test_global_ignore(client, as_source=True, extra_ignore='/\?v=\d/')

View File

@@ -72,7 +72,7 @@ services:
# Comment out ports: when using behind a reverse proxy , enable networks: etc.
ports:
- 5000:5000
- 127.0.0.1:5000:5000
restart: unless-stopped
# Used for fetching pages via WebDriver+Chrome where you need Javascript support.
@@ -82,7 +82,7 @@ services:
# If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that
# and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used)
# depends_on:
# sockpuppetbrowser:
# browser-sockpuppet-chrome:
# condition: service_started