mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-19 14:35:35 +00:00
WIP
This commit is contained in:
@@ -89,9 +89,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
processor = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Init a new 'difference_detection_processor'
|
||||
processor_module_name = f"changedetectionio.processors.{processor}.processor"
|
||||
try:
|
||||
processor_module = importlib.import_module(processor_module_name)
|
||||
processor_module = importlib.import_module(f"changedetectionio.processors.{processor}.processor")
|
||||
except ModuleNotFoundError as e:
|
||||
print(f"Processor module '{processor}' not found.")
|
||||
raise e
|
||||
|
||||
@@ -51,6 +51,7 @@ class Fetcher():
|
||||
favicon_blob = None
|
||||
instock_data = None
|
||||
instock_data_js = ""
|
||||
screenshot_format = None
|
||||
status_code = None
|
||||
webdriver_js_execute_code = None
|
||||
xpath_data = None
|
||||
@@ -70,6 +71,11 @@ class Fetcher():
|
||||
supports_screenshots = False # Can capture page screenshots
|
||||
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if kwargs and 'screenshot_format' in kwargs:
|
||||
self.screenshot_format = kwargs.get('screenshot_format')
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return data for status icon to display in the watch overview.
|
||||
|
||||
@@ -9,7 +9,7 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, vi
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
async def capture_full_page_async(page):
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
import os
|
||||
import time
|
||||
from multiprocessing import Process, Pipe
|
||||
@@ -35,6 +35,11 @@ async def capture_full_page_async(page):
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
|
||||
# Capture screenshots in chunks up to the max total height
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
# Only scroll if not at the top (y > 0)
|
||||
if y > 0:
|
||||
@@ -43,11 +48,15 @@ async def capture_full_page_async(page):
|
||||
# Request GC only before screenshot (not 3x per chunk)
|
||||
await page.request_gc()
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(
|
||||
type="jpeg",
|
||||
full_page=False,
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
))
|
||||
screenshot_kwargs = {
|
||||
'type': screenshot_type,
|
||||
'full_page': False
|
||||
}
|
||||
# Only pass quality parameter for jpeg (PNG doesn't support it in Playwright)
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
y += step_size
|
||||
|
||||
# Restore original viewport size
|
||||
@@ -116,8 +125,8 @@ class fetcher(Fetcher):
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
||||
|
||||
@@ -152,7 +161,7 @@ class fetcher(Fetcher):
|
||||
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
screenshot = await capture_full_page_async(page=self.page)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
|
||||
if self.browser_steps_screenshot_path is not None:
|
||||
@@ -178,6 +187,7 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
@@ -272,7 +282,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page)
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||
@@ -321,7 +331,7 @@ class fetcher(Fetcher):
|
||||
# acceptable screenshot quality here
|
||||
try:
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = await capture_full_page_async(page=self.page)
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
|
||||
@@ -20,7 +20,7 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
async def capture_full_page(page):
|
||||
async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
import os
|
||||
import time
|
||||
from multiprocessing import Process, Pipe
|
||||
@@ -41,6 +41,10 @@ async def capture_full_page(page):
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Something that will not cause the GPU to overflow when taking the screenshot
|
||||
screenshot_chunks = []
|
||||
@@ -60,9 +64,15 @@ async def capture_full_page(page):
|
||||
y
|
||||
)
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(type_='jpeg',
|
||||
fullPage=False,
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))))
|
||||
screenshot_kwargs = {
|
||||
'type_': screenshot_type,
|
||||
'fullPage': False
|
||||
}
|
||||
# PNG doesn't support quality parameter in Puppeteer
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
y += step_size
|
||||
|
||||
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
@@ -112,8 +122,8 @@ class fetcher(Fetcher):
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if custom_browser_connection_url:
|
||||
self.browser_connection_is_custom = True
|
||||
@@ -167,6 +177,7 @@ class fetcher(Fetcher):
|
||||
request_body,
|
||||
request_headers,
|
||||
request_method,
|
||||
screenshot_format,
|
||||
timeout,
|
||||
url,
|
||||
watch_uuid
|
||||
@@ -316,7 +327,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page)
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
@@ -354,7 +365,7 @@ class fetcher(Fetcher):
|
||||
|
||||
self.content = await self.page.content
|
||||
|
||||
self.screenshot = await capture_full_page(page=self.page)
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
# It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
|
||||
logger.success(f"Fetching '{url}' complete, closing page")
|
||||
@@ -375,6 +386,7 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
@@ -394,6 +406,7 @@ class fetcher(Fetcher):
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
screenshot_format=None,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
watch_uuid=watch_uuid,
|
||||
|
||||
@@ -12,8 +12,8 @@ from changedetectionio.content_fetchers.base import Fetcher
|
||||
class fetcher(Fetcher):
|
||||
fetcher_description = "Basic fast Plaintext/HTTP Client"
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.proxy_override = proxy_override
|
||||
# browser_connection_url is none because its always 'launched locally'
|
||||
|
||||
@@ -135,6 +135,7 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
|
||||
@@ -28,8 +28,8 @@ class fetcher(Fetcher):
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
from urllib.parse import urlparse
|
||||
from selenium.webdriver.common.proxy import Proxy
|
||||
|
||||
@@ -69,6 +69,7 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
@@ -146,7 +147,21 @@ class fetcher(Fetcher):
|
||||
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
|
||||
self.content = driver.page_source
|
||||
self.headers = {}
|
||||
self.screenshot = driver.get_screenshot_as_png()
|
||||
|
||||
# Selenium always captures as PNG, convert to JPEG if needed
|
||||
screenshot_png = driver.get_screenshot_as_png()
|
||||
|
||||
# Convert to JPEG if requested (for smaller file size)
|
||||
if self.screenshot_format and self.screenshot_format.upper() == 'JPEG':
|
||||
from PIL import Image
|
||||
import io
|
||||
img = Image.open(io.BytesIO(screenshot_png))
|
||||
jpeg_buffer = io.BytesIO()
|
||||
img.save(jpeg_buffer, format='JPEG', quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||
self.screenshot = jpeg_buffer.getvalue()
|
||||
img.close()
|
||||
else:
|
||||
self.screenshot = screenshot_png
|
||||
except Exception as e:
|
||||
driver.quit()
|
||||
raise e
|
||||
|
||||
@@ -10,6 +10,9 @@ import os
|
||||
import pkgutil
|
||||
import re
|
||||
|
||||
SCREENSHOT_FORMAT_JPEG = 'JPEG'
|
||||
SCREENSHOT_FORMAT_PNG = 'PNG'
|
||||
|
||||
class difference_detection_processor():
|
||||
|
||||
browser_steps = None
|
||||
@@ -19,9 +22,9 @@ class difference_detection_processor():
|
||||
watch = None
|
||||
xpath_data = None
|
||||
preferred_proxy = None
|
||||
screenshot_format = SCREENSHOT_FORMAT_JPEG
|
||||
|
||||
def __init__(self, *args, datastore, watch_uuid, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
def __init__(self, datastore, watch_uuid):
|
||||
self.datastore = datastore
|
||||
self.watch_uuid = watch_uuid
|
||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
@@ -97,7 +100,8 @@ class difference_detection_processor():
|
||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
||||
custom_browser_connection_url=custom_browser_connection_url
|
||||
custom_browser_connection_url=custom_browser_connection_url,
|
||||
screenshot_format=self.screenshot_format
|
||||
)
|
||||
|
||||
if self.watch.has_browser_steps:
|
||||
@@ -159,6 +163,7 @@ class difference_detection_processor():
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
screenshot_format = self.screenshot_format,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
watch_uuid=self.watch_uuid,
|
||||
|
||||
@@ -10,7 +10,7 @@ import hashlib
|
||||
import os
|
||||
import time
|
||||
from loguru import logger
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
from changedetectionio.processors import difference_detection_processor, SCREENSHOT_FORMAT_PNG
|
||||
from changedetectionio.processors.exceptions import ProcessorException
|
||||
from . import DEFAULT_COMPARISON_METHOD, DEFAULT_COMPARISON_THRESHOLD_OPENCV, DEFAULT_COMPARISON_THRESHOLD_PIXELMATCH
|
||||
|
||||
@@ -21,6 +21,9 @@ description = 'Compares screenshots using fast algorithms (OpenCV or pixelmatch)
|
||||
class perform_site_check(difference_detection_processor):
|
||||
"""Fast screenshot comparison processor."""
|
||||
|
||||
# Override to use PNG format for better image comparison (JPEG compression creates noise)
|
||||
screenshot_format = SCREENSHOT_FORMAT_PNG
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
"""
|
||||
Perform screenshot comparison using OpenCV or pixelmatch.
|
||||
|
||||
Reference in New Issue
Block a user