Compare commits

..

29 Commits

Author SHA1 Message Date
dgtlmoon
c7dc25bdfc tweaks
Some checks are pending
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Waiting to run
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Blocked by required conditions
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Blocked by required conditions
ChangeDetection.io App Test / lint-code (push) Waiting to run
ChangeDetection.io App Test / test-application-3-10 (push) Blocked by required conditions
ChangeDetection.io App Test / test-application-3-11 (push) Blocked by required conditions
ChangeDetection.io App Test / test-application-3-12 (push) Blocked by required conditions
ChangeDetection.io App Test / test-application-3-13 (push) Blocked by required conditions
2026-02-02 22:27:31 +01:00
dgtlmoon
ca85310fb0 WIP 2026-02-02 22:15:43 +01:00
dgtlmoon
6907bfab1e tweaks 2026-02-02 22:01:52 +01:00
dgtlmoon
65e6b461cf Tweaks
Some checks failed
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled
2026-02-02 20:24:33 +01:00
dgtlmoon
d96ddc0f23 Tweaks 2026-02-02 20:20:07 +01:00
dgtlmoon
e09c2813b1 Lower workers for testing 2026-02-02 20:04:17 +01:00
dgtlmoon
1f3c0995e5 test speeupds 2026-02-02 19:51:04 +01:00
dgtlmoon
d420bda7e4 tweaks 2026-02-02 18:55:59 +01:00
dgtlmoon
f6a1b6d808 Timing tune 2026-02-02 18:49:54 +01:00
dgtlmoon
6f12412396 Queue changes 2026-02-02 18:37:07 +01:00
dgtlmoon
ff2ead88dd test tweak 2026-02-02 18:19:27 +01:00
dgtlmoon
c38e3df4ee Bump ignore 2026-02-02 18:10:55 +01:00
dgtlmoon
899e21a018 Queue timing fixes 2026-02-02 18:10:47 +01:00
dgtlmoon
aea7fc6f0a test cleanup 2026-02-02 15:09:05 +01:00
dgtlmoon
d6d4960762 test tweak 2026-02-02 15:08:22 +01:00
dgtlmoon
72073bfc5e include cleanup 2026-02-02 15:04:10 +01:00
dgtlmoon
8c809872e8 GitHub build - attempt to cache container build better 2026-02-02 14:58:19 +01:00
dgtlmoon
081d803977 test fix 2026-02-02 14:56:34 +01:00
dgtlmoon
61826bbf94 WIP 2026-02-02 14:47:14 +01:00
dgtlmoon
5fc920db5d WIP
Some checks failed
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-02-02 13:02:20 +01:00
dgtlmoon
68fb5cf898 test tweak 2026-02-02 12:59:21 +01:00
dgtlmoon
5b153ca25d Revert test changes 2026-02-02 12:51:39 +01:00
dgtlmoon
f166e96466 Test fix 2026-02-02 11:16:16 +01:00
dgtlmoon
b7eaeb4ae4 Test fixes 2026-02-02 11:06:11 +01:00
dgtlmoon
ef310e4a67 test tweaks
Some checks failed
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-02-01 18:26:33 +01:00
dgtlmoon
cf32bf5f47 test improvements 2026-02-01 18:18:22 +01:00
dgtlmoon
424e4ec1aa Add test for worker active count 2026-02-01 12:27:19 +01:00
dgtlmoon
c1dca306ad Refactor queue handling, add tests 2026-02-01 12:21:39 +01:00
dgtlmoon
e219e8cada Janus queue worker not needed, improves multiple workers 2026-02-01 10:57:04 +01:00
5 changed files with 73 additions and 111 deletions

View File

@@ -480,16 +480,6 @@ class CreateWatch(Resource):
# worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
return {'uuid': new_uuid}, 201
else:
# Check if it was a limit issue
page_watch_limit = os.getenv('PAGE_WATCH_LIMIT')
if page_watch_limit:
try:
page_watch_limit = int(page_watch_limit)
current_watch_count = len(self.datastore.data['watching'])
if current_watch_count >= page_watch_limit:
return f"Watch limit reached ({current_watch_count}/{page_watch_limit} watches). Cannot add more watches.", 429
except ValueError:
pass
return "Invalid or unsupported URL", 400
@auth.check_token

View File

@@ -1,4 +1,3 @@
import asyncio
import gc
import json
import os
@@ -350,7 +349,12 @@ class fetcher(Fetcher):
if self.status_code != 200 and not ignore_status_codes:
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
# Finally block will handle cleanup
# Cleanup before raising to prevent memory leak
await self.page.close()
await context.close()
await browser.close()
# Force garbage collection to release Playwright resources immediately
gc.collect()
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
@@ -366,7 +370,12 @@ class fetcher(Fetcher):
try:
await self.iterate_browser_steps(start_url=url)
except BrowserStepsStepException:
# Finally block will handle cleanup
try:
await context.close()
await browser.close()
except Exception as e:
# Fine, could be messy situation
pass
raise
await self.page.wait_for_timeout(extra_wait * 1000)
@@ -415,40 +424,35 @@ class fetcher(Fetcher):
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
finally:
# Clean up resources properly with timeouts to prevent hanging
# Request garbage collection one more time before closing
try:
if hasattr(self, 'page') and self.page:
await self.page.request_gc()
await asyncio.wait_for(self.page.close(), timeout=5.0)
logger.debug(f"Successfully closed page for {url}")
except asyncio.TimeoutError:
logger.warning(f"Timed out closing page for {url} (5s)")
except Exception as e:
logger.warning(f"Error closing page for {url}: {e}")
finally:
self.page = None
await self.page.request_gc()
except:
pass
# Clean up resources properly
try:
await self.page.request_gc()
except:
pass
try:
if context:
await asyncio.wait_for(context.close(), timeout=5.0)
logger.debug(f"Successfully closed context for {url}")
except asyncio.TimeoutError:
logger.warning(f"Timed out closing context for {url} (5s)")
except Exception as e:
logger.warning(f"Error closing context for {url}: {e}")
finally:
context = None
await self.page.close()
except:
pass
self.page = None
try:
if browser:
await asyncio.wait_for(browser.close(), timeout=5.0)
logger.debug(f"Successfully closed browser connection for {url}")
except asyncio.TimeoutError:
logger.warning(f"Timed out closing browser connection for {url} (5s)")
except Exception as e:
logger.warning(f"Error closing browser for {url}: {e}")
finally:
browser = None
await context.close()
except:
pass
context = None
try:
await browser.close()
except:
pass
browser = None
# Force Python GC to release Playwright resources immediately
# Playwright objects can have circular references that delay cleanup

View File

@@ -1,5 +1,4 @@
import asyncio
import gc
import json
import os
import websockets.exceptions
@@ -222,36 +221,19 @@ class fetcher(Fetcher):
self.browser_connection_url += f"{r}--proxy-server={proxy_url}"
async def quit(self, watch=None):
watch_uuid = watch.get('uuid') if watch else 'unknown'
# Close page
try:
if hasattr(self, 'page') and self.page:
await asyncio.wait_for(self.page.close(), timeout=5.0)
logger.debug(f"[{watch_uuid}] Page closed successfully")
except asyncio.TimeoutError:
logger.warning(f"[{watch_uuid}] Timed out closing page (5s)")
await self.page.close()
del self.page
except Exception as e:
logger.warning(f"[{watch_uuid}] Error closing page: {e}")
finally:
self.page = None
pass
# Close browser connection
try:
if hasattr(self, 'browser') and self.browser:
await asyncio.wait_for(self.browser.close(), timeout=5.0)
logger.debug(f"[{watch_uuid}] Browser closed successfully")
except asyncio.TimeoutError:
logger.warning(f"[{watch_uuid}] Timed out closing browser (5s)")
await self.browser.close()
del self.browser
except Exception as e:
logger.warning(f"[{watch_uuid}] Error closing browser: {e}")
finally:
self.browser = None
pass
logger.info(f"[{watch_uuid}] Cleanup puppeteer complete")
# Force garbage collection to release resources
gc.collect()
logger.info("Cleanup puppeteer complete.")
async def fetch_page(self,
current_include_filters,
@@ -281,11 +263,9 @@ class fetcher(Fetcher):
# Connect directly using the specified browser_ws_endpoint
# @todo timeout
try:
logger.debug(f"[{watch_uuid}] Connecting to browser at {self.browser_connection_url}")
self.browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
ignoreHTTPSErrors=True
)
logger.debug(f"[{watch_uuid}] Browser connected successfully")
except websockets.exceptions.InvalidStatusCode as e:
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
except websockets.exceptions.InvalidURI:
@@ -294,18 +274,7 @@ class fetcher(Fetcher):
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
# more reliable is to just request a new page
try:
logger.debug(f"[{watch_uuid}] Creating new page")
self.page = await self.browser.newPage()
logger.debug(f"[{watch_uuid}] Page created successfully")
except Exception as e:
logger.error(f"[{watch_uuid}] Failed to create new page: {e}")
# Browser is connected but page creation failed - must cleanup browser
try:
await asyncio.wait_for(self.browser.close(), timeout=3.0)
except Exception as cleanup_error:
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
raise
self.page = await self.browser.newPage()
# Add console handler to capture console.log from favicon fetcher
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
@@ -374,12 +343,6 @@ class fetcher(Fetcher):
w = extra_wait - 2 if extra_wait > 4 else 2
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
await asyncio.sleep(w)
# Check if page still exists (might have been closed due to error during sleep)
if not self.page or not hasattr(self.page, '_client'):
logger.debug("Page already closed, skipping stopLoading")
return
logger.debug("Issuing stopLoading command...")
await self.page._client.send('Page.stopLoading')
logger.debug("stopLoading command sent!")
@@ -405,9 +368,7 @@ class fetcher(Fetcher):
asyncio.create_task(handle_frame_navigation())
response = await self.page.goto(url, timeout=0)
await asyncio.sleep(1 + extra_wait)
# Check if page still exists before sending command
if self.page and hasattr(self.page, '_client'):
await self.page._client.send('Page.stopLoading')
await self.page._client.send('Page.stopLoading')
if response:
break
@@ -476,9 +437,15 @@ class fetcher(Fetcher):
logger.debug(f"Screenshot format {self.screenshot_format}")
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
# Force garbage collection - pyppeteer base64 decode creates temporary buffers
# Force aggressive memory cleanup - pyppeteer base64 decode creates temporary buffers
import gc
gc.collect()
# Release C-level memory from base64 decode back to OS
try:
import ctypes
ctypes.CDLL('libc.so.6').malloc_trim(0)
except Exception:
pass
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
"visualselector_xpath_selectors": visualselector_xpath_selectors,
"max_height": MAX_TOTAL_HEIGHT

View File

@@ -607,19 +607,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
return None
# Check PAGE_WATCH_LIMIT if set
page_watch_limit = os.getenv('PAGE_WATCH_LIMIT')
if page_watch_limit:
try:
page_watch_limit = int(page_watch_limit)
current_watch_count = len(self.__data['watching'])
if current_watch_count >= page_watch_limit:
logger.error(f"Watch limit reached: {current_watch_count}/{page_watch_limit} watches. Cannot add {url}")
flash(gettext("Watch limit reached ({}/{} watches). Cannot add more watches.").format(current_watch_count, page_watch_limit), 'error')
return None
except ValueError:
logger.warning(f"Invalid PAGE_WATCH_LIMIT value: {page_watch_limit}, ignoring limit check")
if tag and type(tag) == str:
# Then it's probably a string of the actual tag by name, split and add it
for t in tag.split(','):

View File

@@ -475,9 +475,14 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
del update_handler
update_handler = None
# Force garbage collection
# Force aggressive memory cleanup after clearing
import gc
gc.collect()
try:
import ctypes
ctypes.CDLL('libc.so.6').malloc_trim(0)
except Exception:
pass
except Exception as e:
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
@@ -490,7 +495,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
finally:
# Always cleanup - this runs whether there was an exception or not
if uuid:
# Call quit() as backup (Puppeteer/Playwright have internal cleanup, but this acts as safety net)
try:
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
await update_handler.fetcher.quit(watch=watch)
@@ -499,25 +503,35 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
try:
# Release UUID from processing (thread-safe)
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
# Send completion signal
if watch:
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
watch_check_update.send(watch_uuid=watch['uuid'])
# Clean up all memory references BEFORE garbage collection
# Explicitly clean up update_handler and all its references
if update_handler:
# Clear fetcher content using the proper method
if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
update_handler.fetcher.clear_content()
# Clear processor references
if hasattr(update_handler, 'content_processor'):
update_handler.content_processor = None
del update_handler
update_handler = None
# Clear large content variables
# Clear local contents variable if it still exists
if 'contents' in locals():
del contents
# Force garbage collection after all references are cleared
# Note: We don't set watch = None here because:
# 1. watch is just a local reference to datastore.data['watching'][uuid]
# 2. Setting it to None doesn't affect the datastore
# 3. GC can't collect the object anyway (still referenced by datastore)
# 4. It would just cause confusion
# Force garbage collection after cleanup
import gc
gc.collect()