This commit is contained in:
dgtlmoon
2025-12-17 11:43:48 +01:00
parent fbd22bfafe
commit 6986c6687c
8 changed files with 82 additions and 30 deletions

View File

@@ -89,9 +89,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
processor = watch.get('processor', 'text_json_diff')
# Init a new 'difference_detection_processor'
processor_module_name = f"changedetectionio.processors.{processor}.processor"
try:
processor_module = importlib.import_module(processor_module_name)
processor_module = importlib.import_module(f"changedetectionio.processors.{processor}.processor")
except ModuleNotFoundError as e:
print(f"Processor module '{processor}' not found.")
raise e

View File

@@ -51,6 +51,7 @@ class Fetcher():
favicon_blob = None
instock_data = None
instock_data_js = ""
screenshot_format = None
status_code = None
webdriver_js_execute_code = None
xpath_data = None
@@ -70,6 +71,11 @@ class Fetcher():
supports_screenshots = False # Can capture page screenshots
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
def __init__(self, **kwargs):
if kwargs and 'screenshot_format' in kwargs:
self.screenshot_format = kwargs.get('screenshot_format')
@classmethod
def get_status_icon_data(cls):
"""Return data for status icon to display in the watch overview.

View File

@@ -9,7 +9,7 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, vi
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
async def capture_full_page_async(page):
async def capture_full_page_async(page, screenshot_format='JPEG'):
import os
import time
from multiprocessing import Process, Pipe
@@ -35,6 +35,11 @@ async def capture_full_page_async(page):
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
# Capture screenshots in chunks up to the max total height
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
# PNG should use quality 100, JPEG uses configurable quality
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
# Only scroll if not at the top (y > 0)
if y > 0:
@@ -43,11 +48,15 @@ async def capture_full_page_async(page):
# Request GC only before screenshot (not 3x per chunk)
await page.request_gc()
screenshot_chunks.append(await page.screenshot(
type="jpeg",
full_page=False,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
))
screenshot_kwargs = {
'type': screenshot_type,
'full_page': False
}
# Only pass quality parameter for jpeg (PNG doesn't support it in Playwright)
if screenshot_type == 'jpeg':
screenshot_kwargs['quality'] = screenshot_quality
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
y += step_size
# Restore original viewport size
@@ -116,8 +125,8 @@ class fetcher(Fetcher):
'title': 'Using a Chrome browser'
}
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs)
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
@@ -152,7 +161,7 @@ class fetcher(Fetcher):
async def screenshot_step(self, step_n=''):
super().screenshot_step(step_n=step_n)
screenshot = await capture_full_page_async(page=self.page)
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
if self.browser_steps_screenshot_path is not None:
@@ -178,6 +187,7 @@ class fetcher(Fetcher):
request_body=None,
request_headers=None,
request_method=None,
screenshot_format=None,
timeout=None,
url=None,
watch_uuid=None,
@@ -272,7 +282,7 @@ class fetcher(Fetcher):
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
if self.status_code != 200 and not ignore_status_codes:
screenshot = await capture_full_page_async(self.page)
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
@@ -321,7 +331,7 @@ class fetcher(Fetcher):
# acceptable screenshot quality here
try:
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
self.screenshot = await capture_full_page_async(page=self.page)
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
except Exception as e:
# It's likely the screenshot was too long/big and something crashed

View File

@@ -20,7 +20,7 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
async def capture_full_page(page):
async def capture_full_page(page, screenshot_format='JPEG'):
import os
import time
from multiprocessing import Process, Pipe
@@ -41,6 +41,10 @@ async def capture_full_page(page):
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
# PNG should use quality 100, JPEG uses configurable quality
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Something that will not cause the GPU to overflow when taking the screenshot
screenshot_chunks = []
@@ -60,9 +64,15 @@ async def capture_full_page(page):
y
)
screenshot_chunks.append(await page.screenshot(type_='jpeg',
fullPage=False,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))))
screenshot_kwargs = {
'type_': screenshot_type,
'fullPage': False
}
# PNG doesn't support quality parameter in Puppeteer
if screenshot_type == 'jpeg':
screenshot_kwargs['quality'] = screenshot_quality
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
y += step_size
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
@@ -112,8 +122,8 @@ class fetcher(Fetcher):
'title': 'Using a Chrome browser'
}
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs)
if custom_browser_connection_url:
self.browser_connection_is_custom = True
@@ -167,6 +177,7 @@ class fetcher(Fetcher):
request_body,
request_headers,
request_method,
screenshot_format,
timeout,
url,
watch_uuid
@@ -316,7 +327,7 @@ class fetcher(Fetcher):
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
if self.status_code != 200 and not ignore_status_codes:
screenshot = await capture_full_page(page=self.page)
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
@@ -354,7 +365,7 @@ class fetcher(Fetcher):
self.content = await self.page.content
self.screenshot = await capture_full_page(page=self.page)
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
# It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
logger.success(f"Fetching '{url}' complete, closing page")
@@ -375,6 +386,7 @@ class fetcher(Fetcher):
request_body=None,
request_headers=None,
request_method=None,
screenshot_format=None,
timeout=None,
url=None,
watch_uuid=None,
@@ -394,6 +406,7 @@ class fetcher(Fetcher):
request_body=request_body,
request_headers=request_headers,
request_method=request_method,
screenshot_format=None,
timeout=timeout,
url=url,
watch_uuid=watch_uuid,

View File

@@ -12,8 +12,8 @@ from changedetectionio.content_fetchers.base import Fetcher
class fetcher(Fetcher):
fetcher_description = "Basic fast Plaintext/HTTP Client"
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs)
self.proxy_override = proxy_override
# browser_connection_url is none because its always 'launched locally'
@@ -135,6 +135,7 @@ class fetcher(Fetcher):
request_body=None,
request_headers=None,
request_method=None,
screenshot_format=None,
timeout=None,
url=None,
watch_uuid=None,

View File

@@ -28,8 +28,8 @@ class fetcher(Fetcher):
'title': 'Using a Chrome browser'
}
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs)
from urllib.parse import urlparse
from selenium.webdriver.common.proxy import Proxy
@@ -69,6 +69,7 @@ class fetcher(Fetcher):
request_body=None,
request_headers=None,
request_method=None,
screenshot_format=None,
timeout=None,
url=None,
watch_uuid=None,
@@ -146,7 +147,21 @@ class fetcher(Fetcher):
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
self.content = driver.page_source
self.headers = {}
self.screenshot = driver.get_screenshot_as_png()
# Selenium always captures as PNG, convert to JPEG if needed
screenshot_png = driver.get_screenshot_as_png()
# Convert to JPEG if requested (for smaller file size)
if self.screenshot_format and self.screenshot_format.upper() == 'JPEG':
from PIL import Image
import io
img = Image.open(io.BytesIO(screenshot_png))
jpeg_buffer = io.BytesIO()
img.save(jpeg_buffer, format='JPEG', quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
self.screenshot = jpeg_buffer.getvalue()
img.close()
else:
self.screenshot = screenshot_png
except Exception as e:
driver.quit()
raise e

View File

@@ -10,6 +10,9 @@ import os
import pkgutil
import re
SCREENSHOT_FORMAT_JPEG = 'JPEG'
SCREENSHOT_FORMAT_PNG = 'PNG'
class difference_detection_processor():
browser_steps = None
@@ -19,9 +22,9 @@ class difference_detection_processor():
watch = None
xpath_data = None
preferred_proxy = None
screenshot_format = SCREENSHOT_FORMAT_JPEG
def __init__(self, *args, datastore, watch_uuid, **kwargs):
super().__init__(*args, **kwargs)
def __init__(self, datastore, watch_uuid):
self.datastore = datastore
self.watch_uuid = watch_uuid
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
@@ -97,7 +100,8 @@ class difference_detection_processor():
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
self.fetcher = fetcher_obj(proxy_override=proxy_url,
custom_browser_connection_url=custom_browser_connection_url
custom_browser_connection_url=custom_browser_connection_url,
screenshot_format=self.screenshot_format
)
if self.watch.has_browser_steps:
@@ -159,6 +163,7 @@ class difference_detection_processor():
request_body=request_body,
request_headers=request_headers,
request_method=request_method,
screenshot_format = self.screenshot_format,
timeout=timeout,
url=url,
watch_uuid=self.watch_uuid,

View File

@@ -10,7 +10,7 @@ import hashlib
import os
import time
from loguru import logger
from changedetectionio.processors import difference_detection_processor
from changedetectionio.processors import difference_detection_processor, SCREENSHOT_FORMAT_PNG
from changedetectionio.processors.exceptions import ProcessorException
from . import DEFAULT_COMPARISON_METHOD, DEFAULT_COMPARISON_THRESHOLD_OPENCV, DEFAULT_COMPARISON_THRESHOLD_PIXELMATCH
@@ -21,6 +21,9 @@ description = 'Compares screenshots using fast algorithms (OpenCV or pixelmatch)
class perform_site_check(difference_detection_processor):
"""Fast screenshot comparison processor."""
# Override to use PNG format for better image comparison (JPEG compression creates noise)
screenshot_format = SCREENSHOT_FORMAT_PNG
def run_changedetection(self, watch):
"""
Perform screenshot comparison using OpenCV or pixelmatch.