tweaks

WIP
tweaks
2026-02-03 21:06:00 +00:00 · 2026-02-02 22:27:31 +01:00 · 2026-02-02 22:15:43 +01:00 · 2026-02-02 22:01:52 +01:00 · 2026-02-02 20:24:33 +01:00 · 2026-02-02 20:20:07 +01:00
5 changed files with 73 additions and 111 deletions
--- a/changedetectionio/api/Watch.py
+++ b/changedetectionio/api/Watch.py
@@ -480,16 +480,6 @@ class CreateWatch(Resource):
 #            worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
            return {'uuid': new_uuid}, 201
        else:
-            # Check if it was a limit issue
-            page_watch_limit = os.getenv('PAGE_WATCH_LIMIT')
-            if page_watch_limit:
-                try:
-                    page_watch_limit = int(page_watch_limit)
-                    current_watch_count = len(self.datastore.data['watching'])
-                    if current_watch_count >= page_watch_limit:
-                        return f"Watch limit reached ({current_watch_count}/{page_watch_limit} watches). Cannot add more watches.", 429
-                except ValueError:
-                    pass
            return "Invalid or unsupported URL", 400

    @auth.check_token
--- a/changedetectionio/content_fetchers/playwright.py
+++ b/changedetectionio/content_fetchers/playwright.py
@@ -1,4 +1,3 @@
-import asyncio
 import gc
 import json
 import os
@@ -350,7 +349,12 @@ class fetcher(Fetcher):

            if self.status_code != 200 and not ignore_status_codes:
                screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
-                # Finally block will handle cleanup
+                # Cleanup before raising to prevent memory leak
+                await self.page.close()
+                await context.close()
+                await browser.close()
+                # Force garbage collection to release Playwright resources immediately
+                gc.collect()
                raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)

            if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
@@ -366,7 +370,12 @@ class fetcher(Fetcher):
                    try:
                        await self.iterate_browser_steps(start_url=url)
                    except BrowserStepsStepException:
-                        # Finally block will handle cleanup
+                        try:
+                            await context.close()
+                            await browser.close()
+                        except Exception as e:
+                            # Fine, could be messy situation
+                            pass
                        raise

                    await self.page.wait_for_timeout(extra_wait * 1000)
@@ -415,40 +424,35 @@ class fetcher(Fetcher):
                raise ScreenshotUnavailable(url=url, status_code=self.status_code)

            finally:
-                # Clean up resources properly with timeouts to prevent hanging
+                # Request garbage collection one more time before closing
                try:
-                    if hasattr(self, 'page') and self.page:
-                        await self.page.request_gc()
-                        await asyncio.wait_for(self.page.close(), timeout=5.0)
-                        logger.debug(f"Successfully closed page for {url}")
-                except asyncio.TimeoutError:
-                    logger.warning(f"Timed out closing page for {url} (5s)")
-                except Exception as e:
-                    logger.warning(f"Error closing page for {url}: {e}")
-                finally:
-                    self.page = None
+                    await self.page.request_gc()
+                except:
+                    pass
+                
+                # Clean up resources properly
+                try:
+                    await self.page.request_gc()
+                except:
+                    pass

                try:
-                    if context:
-                        await asyncio.wait_for(context.close(), timeout=5.0)
-                        logger.debug(f"Successfully closed context for {url}")
-                except asyncio.TimeoutError:
-                    logger.warning(f"Timed out closing context for {url} (5s)")
-                except Exception as e:
-                    logger.warning(f"Error closing context for {url}: {e}")
-                finally:
-                    context = None
+                    await self.page.close()
+                except:
+                    pass
+                self.page = None

                try:
-                    if browser:
-                        await asyncio.wait_for(browser.close(), timeout=5.0)
-                        logger.debug(f"Successfully closed browser connection for {url}")
-                except asyncio.TimeoutError:
-                    logger.warning(f"Timed out closing browser connection for {url} (5s)")
-                except Exception as e:
-                    logger.warning(f"Error closing browser for {url}: {e}")
-                finally:
-                    browser = None
+                    await context.close()
+                except:
+                    pass
+                context = None
+
+                try:
+                    await browser.close()
+                except:
+                    pass
+                browser = None

                # Force Python GC to release Playwright resources immediately
                # Playwright objects can have circular references that delay cleanup
--- a/changedetectionio/content_fetchers/puppeteer.py
+++ b/changedetectionio/content_fetchers/puppeteer.py
@@ -1,5 +1,4 @@
 import asyncio
-import gc
 import json
 import os
 import websockets.exceptions
@@ -222,36 +221,19 @@ class fetcher(Fetcher):
                self.browser_connection_url += f"{r}--proxy-server={proxy_url}"

    async def quit(self, watch=None):
-        watch_uuid = watch.get('uuid') if watch else 'unknown'
-
-        # Close page
        try:
-            if hasattr(self, 'page') and self.page:
-                await asyncio.wait_for(self.page.close(), timeout=5.0)
-                logger.debug(f"[{watch_uuid}] Page closed successfully")
-        except asyncio.TimeoutError:
-            logger.warning(f"[{watch_uuid}] Timed out closing page (5s)")
+            await self.page.close()
+            del self.page
        except Exception as e:
-            logger.warning(f"[{watch_uuid}] Error closing page: {e}")
-        finally:
-            self.page = None
+            pass

-        # Close browser connection
        try:
-            if hasattr(self, 'browser') and self.browser:
-                await asyncio.wait_for(self.browser.close(), timeout=5.0)
-                logger.debug(f"[{watch_uuid}] Browser closed successfully")
-        except asyncio.TimeoutError:
-            logger.warning(f"[{watch_uuid}] Timed out closing browser (5s)")
+            await self.browser.close()
+            del self.browser
        except Exception as e:
-            logger.warning(f"[{watch_uuid}] Error closing browser: {e}")
-        finally:
-            self.browser = None
+            pass

-        logger.info(f"[{watch_uuid}] Cleanup puppeteer complete")
-
-        # Force garbage collection to release resources
-        gc.collect()
+        logger.info("Cleanup puppeteer complete.")

    async def fetch_page(self,
                         current_include_filters,
@@ -281,11 +263,9 @@ class fetcher(Fetcher):
        # Connect directly using the specified browser_ws_endpoint
        # @todo timeout
        try:
-            logger.debug(f"[{watch_uuid}] Connecting to browser at {self.browser_connection_url}")
            self.browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
                                                            ignoreHTTPSErrors=True
                                                            )
-            logger.debug(f"[{watch_uuid}] Browser connected successfully")
        except websockets.exceptions.InvalidStatusCode as e:
            raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
        except websockets.exceptions.InvalidURI:
@@ -294,18 +274,7 @@ class fetcher(Fetcher):
            raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")

        # more reliable is to just request a new page
-        try:
-            logger.debug(f"[{watch_uuid}] Creating new page")
-            self.page = await self.browser.newPage()
-            logger.debug(f"[{watch_uuid}] Page created successfully")
-        except Exception as e:
-            logger.error(f"[{watch_uuid}] Failed to create new page: {e}")
-            # Browser is connected but page creation failed - must cleanup browser
-            try:
-                await asyncio.wait_for(self.browser.close(), timeout=3.0)
-            except Exception as cleanup_error:
-                logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
-            raise
+        self.page = await self.browser.newPage()
        
        # Add console handler to capture console.log from favicon fetcher
        #self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
@@ -374,12 +343,6 @@ class fetcher(Fetcher):
            w = extra_wait - 2 if extra_wait > 4 else 2
            logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
            await asyncio.sleep(w)
-
-            # Check if page still exists (might have been closed due to error during sleep)
-            if not self.page or not hasattr(self.page, '_client'):
-                logger.debug("Page already closed, skipping stopLoading")
-                return
-
            logger.debug("Issuing stopLoading command...")
            await self.page._client.send('Page.stopLoading')
            logger.debug("stopLoading command sent!")
@@ -405,9 +368,7 @@ class fetcher(Fetcher):
            asyncio.create_task(handle_frame_navigation())
            response = await self.page.goto(url, timeout=0)
            await asyncio.sleep(1 + extra_wait)
-            # Check if page still exists before sending command
-            if self.page and hasattr(self.page, '_client'):
-                await self.page._client.send('Page.stopLoading')
+            await self.page._client.send('Page.stopLoading')

            if response:
                break
@@ -476,9 +437,15 @@ class fetcher(Fetcher):
        logger.debug(f"Screenshot format {self.screenshot_format}")
        self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)

-        # Force garbage collection - pyppeteer base64 decode creates temporary buffers
+        # Force aggressive memory cleanup - pyppeteer base64 decode creates temporary buffers
        import gc
        gc.collect()
+        # Release C-level memory from base64 decode back to OS
+        try:
+            import ctypes
+            ctypes.CDLL('libc.so.6').malloc_trim(0)
+        except Exception:
+            pass
        self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
            "visualselector_xpath_selectors": visualselector_xpath_selectors,
            "max_height": MAX_TOTAL_HEIGHT
--- a/changedetectionio/store/init.py
+++ b/changedetectionio/store/init.py
@@ -607,19 +607,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):

            return None

-        # Check PAGE_WATCH_LIMIT if set
-        page_watch_limit = os.getenv('PAGE_WATCH_LIMIT')
-        if page_watch_limit:
-            try:
-                page_watch_limit = int(page_watch_limit)
-                current_watch_count = len(self.__data['watching'])
-                if current_watch_count >= page_watch_limit:
-                    logger.error(f"Watch limit reached: {current_watch_count}/{page_watch_limit} watches. Cannot add {url}")
-                    flash(gettext("Watch limit reached ({}/{} watches). Cannot add more watches.").format(current_watch_count, page_watch_limit), 'error')
-                    return None
-            except ValueError:
-                logger.warning(f"Invalid PAGE_WATCH_LIMIT value: {page_watch_limit}, ignoring limit check")
-
        if tag and type(tag) == str:
            # Then it's probably a string of the actual tag by name, split and add it
            for t in tag.split(','):
--- a/changedetectionio/worker.py
+++ b/changedetectionio/worker.py
@@ -475,9 +475,14 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
                    del update_handler
                    update_handler = None

-                # Force garbage collection
+                # Force aggressive memory cleanup after clearing
                import gc
                gc.collect()
+                try:
+                    import ctypes
+                    ctypes.CDLL('libc.so.6').malloc_trim(0)
+                except Exception:
+                    pass

        except Exception as e:
            logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
@@ -490,7 +495,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
        finally:
            # Always cleanup - this runs whether there was an exception or not
            if uuid:
-                # Call quit() as backup (Puppeteer/Playwright have internal cleanup, but this acts as safety net)
                try:
                    if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
                        await update_handler.fetcher.quit(watch=watch)
@@ -499,25 +503,35 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
                try:
                    # Release UUID from processing (thread-safe)
                    worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
-
+                    
                    # Send completion signal
                    if watch:
+                        #logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
                        watch_check_update.send(watch_uuid=watch['uuid'])

-                    # Clean up all memory references BEFORE garbage collection
+                    # Explicitly clean up update_handler and all its references
                    if update_handler:
+                        # Clear fetcher content using the proper method
                        if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
                            update_handler.fetcher.clear_content()
+
+                        # Clear processor references
                        if hasattr(update_handler, 'content_processor'):
                            update_handler.content_processor = None
-                        del update_handler
+
                        update_handler = None

-                    # Clear large content variables
+                    # Clear local contents variable if it still exists
                    if 'contents' in locals():
                        del contents

-                    # Force garbage collection after all references are cleared
+                    # Note: We don't set watch = None here because:
+                    # 1. watch is just a local reference to datastore.data['watching'][uuid]
+                    # 2. Setting it to None doesn't affect the datastore
+                    # 3. GC can't collect the object anyway (still referenced by datastore)
+                    # 4. It would just cause confusion
+
+                    # Force garbage collection after cleanup
                    import gc
                    gc.collect()
Author	SHA1	Message	Date
dgtlmoon	c7dc25bdfc	tweaks Some checks are pending Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Waiting to run Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Blocked by required conditions Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Blocked by required conditions Details ChangeDetection.io App Test / lint-code (push) Waiting to run Details ChangeDetection.io App Test / test-application-3-10 (push) Blocked by required conditions Details ChangeDetection.io App Test / test-application-3-11 (push) Blocked by required conditions Details ChangeDetection.io App Test / test-application-3-12 (push) Blocked by required conditions Details ChangeDetection.io App Test / test-application-3-13 (push) Blocked by required conditions Details	2026-02-02 22:27:31 +01:00
dgtlmoon	ca85310fb0	WIP	2026-02-02 22:15:43 +01:00
dgtlmoon	6907bfab1e	tweaks	2026-02-02 22:01:52 +01:00
dgtlmoon	65e6b461cf	Tweaks Some checks failed Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details	2026-02-02 20:24:33 +01:00
dgtlmoon	d96ddc0f23	Tweaks	2026-02-02 20:20:07 +01:00
dgtlmoon	e09c2813b1	Lower workers for testing	2026-02-02 20:04:17 +01:00
dgtlmoon	1f3c0995e5	test speeupds	2026-02-02 19:51:04 +01:00
dgtlmoon	d420bda7e4	tweaks	2026-02-02 18:55:59 +01:00
dgtlmoon	f6a1b6d808	Timing tune	2026-02-02 18:49:54 +01:00
dgtlmoon	6f12412396	Queue changes	2026-02-02 18:37:07 +01:00
dgtlmoon	ff2ead88dd	test tweak	2026-02-02 18:19:27 +01:00
dgtlmoon	c38e3df4ee	Bump ignore	2026-02-02 18:10:55 +01:00
dgtlmoon	899e21a018	Queue timing fixes	2026-02-02 18:10:47 +01:00
dgtlmoon	aea7fc6f0a	test cleanup	2026-02-02 15:09:05 +01:00
dgtlmoon	d6d4960762	test tweak	2026-02-02 15:08:22 +01:00
dgtlmoon	72073bfc5e	include cleanup	2026-02-02 15:04:10 +01:00
dgtlmoon	8c809872e8	GitHub build - attempt to cache container build better	2026-02-02 14:58:19 +01:00
dgtlmoon	081d803977	test fix	2026-02-02 14:56:34 +01:00
dgtlmoon	61826bbf94	WIP	2026-02-02 14:47:14 +01:00
dgtlmoon	5fc920db5d	WIP Some checks failed Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-02 13:02:20 +01:00
dgtlmoon	68fb5cf898	test tweak	2026-02-02 12:59:21 +01:00
dgtlmoon	5b153ca25d	Revert test changes	2026-02-02 12:51:39 +01:00
dgtlmoon	f166e96466	Test fix	2026-02-02 11:16:16 +01:00
dgtlmoon	b7eaeb4ae4	Test fixes	2026-02-02 11:06:11 +01:00
dgtlmoon	ef310e4a67	test tweaks Some checks failed Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-02-01 18:26:33 +01:00
dgtlmoon	cf32bf5f47	test improvements	2026-02-01 18:18:22 +01:00
dgtlmoon	424e4ec1aa	Add test for worker active count	2026-02-01 12:27:19 +01:00
dgtlmoon	c1dca306ad	Refactor queue handling, add tests	2026-02-01 12:21:39 +01:00
dgtlmoon	e219e8cada	Janus queue worker not needed, improves multiple workers	2026-02-01 10:57:04 +01:00