mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-01-24 08:00:22 +00:00
Multi-language / Translations Support (#3696) - Complete internationalization system implemented - Support for 7 languages: Czech (cs), German (de), French (fr), Italian (it), Korean (ko), Chinese Simplified (zh), Chinese Traditional (zh_TW) - Language selector with localized flags and theming - Flash message translations - Multiple translation fixes and improvements across all languages - Language setting preserved across redirects Pluggable Content Fetchers (#3653) - New architecture for extensible content fetcher system - Allows custom fetcher implementations Image / Screenshot Comparison Processor (#3680) - New processor for visual change detection (disabled for this release) - Supporting CSS/JS infrastructure added UI Improvements Design & Layout - Auto-generated tag color schemes - Simplified login form styling - Removed hard-coded CSS, moved to SCSS variables - Tag UI cleanup and improvements - Automatic tab wrapper functionality - Menu refactoring for better organization - Cleanup of offset settings - Hide sticky tabs on narrow viewports - Improved responsive layout (#3702) User Experience - Modal alerts/confirmations on delete/clear operations (#3693, #3598, #3382) - Auto-add https:// to URLs in quickwatch form if not present - Better redirect handling on login (#3699) - 'Recheck all' now returns to correct group/tag (#3673) - Language set redirect keeps hash fragment - More friendly human-readable text throughout UI Performance & Reliability Scheduler & Processing - Soft delays instead of blocking time.sleep() calls (#3710) - More resilient handling of same UUID being processed (#3700) - Better Puppeteer timeout handling - Improved Puppeteer shutdown/cleanup (#3692) - Requests cleanup now properly async History & Rendering - Faster server-side "difference" rendering on History page (#3442) - Show ignored/triggered rows in history - API: Retry watch data if watch dict changed (more reliable) API Improvements - Watch get endpoint: retry mechanism for changed watch data - WatchHistoryDiff API endpoint includes extra format args (#3703) Testing Improvements - Replace time.sleep with wait_for_notification_endpoint_output (#3716) - Test for mode switching (#3701) - Test for #3720 added (#3725) - Extract-text difference test fixes - Improved dev workflow Bug Fixes - Notification error text output (#3672, #3669, #3280) - HTML validation fixes (#3704) - Template discovery path fixes - Notification debug log now uses system locale for dates/times - Puppeteer spelling mistake in log output - Recalculation on anchor change - Queue bubble update disabled temporarily Dependency Updates - beautifulsoup4 updated (#3724) - psutil 7.1.0 → 7.2.1 (#3723) - python-engineio ~=4.12.3 → ~=4.13.0 (#3707) - python-socketio ~=5.14.3 → ~=5.16.0 (#3706) - flask-socketio ~=5.5.1 → ~=5.6.0 (#3691) - brotli ~=1.1 → ~=1.2 (#3687) - lxml updated (#3590) - pytest ~=7.2 → ~=9.0 (#3676) - jsonschema ~=4.0 → ~=4.25 (#3618) - pluggy ~=1.5 → ~=1.6 (#3616) - cryptography 44.0.1 → 46.0.3 (security) (#3589) Documentation - README updated with viewport size setup information Development Infrastructure - Dev container only built on dev branch - Improved dev workflow tooling
133 lines
5.0 KiB
Python
133 lines
5.0 KiB
Python
"""
|
|
Base data extraction module for all processors.
|
|
|
|
This module handles extracting data from watch history using regex patterns
|
|
and exporting to CSV format. This is the default extractor that all processors
|
|
(text_json_diff, restock_diff, etc.) can use by default or override.
|
|
"""
|
|
|
|
import os
|
|
from flask_babel import gettext
|
|
from loguru import logger
|
|
|
|
|
|
def render_form(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None):
|
|
"""
|
|
Render the data extraction form.
|
|
|
|
Args:
|
|
watch: The watch object
|
|
datastore: The ChangeDetectionStore instance
|
|
request: Flask request object
|
|
url_for: Flask url_for function
|
|
render_template: Flask render_template function
|
|
flash: Flask flash function
|
|
redirect: Flask redirect function
|
|
extract_form: Optional pre-built extract form (for error cases)
|
|
|
|
Returns:
|
|
Rendered HTML response with the extraction form
|
|
"""
|
|
from changedetectionio import forms
|
|
|
|
uuid = watch.get('uuid')
|
|
|
|
# Use provided form or create a new one
|
|
if extract_form is None:
|
|
extract_form = forms.extractDataForm(
|
|
formdata=request.form,
|
|
data={'extract_regex': request.form.get('extract_regex', '')}
|
|
)
|
|
|
|
# Get error information for the template
|
|
screenshot_url = watch.get_screenshot()
|
|
|
|
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
|
is_html_webdriver = False
|
|
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
|
is_html_webdriver = True
|
|
|
|
password_enabled_and_share_is_off = False
|
|
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
|
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
|
|
|
# Use the shared default template from processors/templates/
|
|
# Processors can override this by creating their own extract.py with custom template logic
|
|
output = render_template(
|
|
"extract.html",
|
|
uuid=uuid,
|
|
extract_form=extract_form,
|
|
watch_a=watch,
|
|
last_error=watch['last_error'],
|
|
last_error_screenshot=watch.get_error_snapshot(),
|
|
last_error_text=watch.get_error_text(),
|
|
screenshot=screenshot_url,
|
|
is_html_webdriver=is_html_webdriver,
|
|
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
|
extra_title=f" - {watch.label} - Extract Data",
|
|
extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')],
|
|
pure_menu_fixed=False
|
|
)
|
|
|
|
return output
|
|
|
|
|
|
def process_extraction(watch, datastore, request, url_for, make_response, send_from_directory, flash, redirect, extract_form=None):
|
|
"""
|
|
Process the data extraction request and return CSV file.
|
|
|
|
Args:
|
|
watch: The watch object
|
|
datastore: The ChangeDetectionStore instance
|
|
request: Flask request object
|
|
url_for: Flask url_for function
|
|
make_response: Flask make_response function
|
|
send_from_directory: Flask send_from_directory function
|
|
flash: Flask flash function
|
|
redirect: Flask redirect function
|
|
extract_form: Optional pre-built extract form
|
|
|
|
Returns:
|
|
CSV file download response or redirect to form on error
|
|
"""
|
|
from changedetectionio import forms
|
|
|
|
uuid = watch.get('uuid')
|
|
|
|
# Use provided form or create a new one
|
|
if extract_form is None:
|
|
extract_form = forms.extractDataForm(
|
|
formdata=request.form,
|
|
data={'extract_regex': request.form.get('extract_regex', '')}
|
|
)
|
|
|
|
if not extract_form.validate():
|
|
flash(gettext("An error occurred, please see below."), "error")
|
|
# render_template needs to be imported from Flask for this to work
|
|
from flask import render_template as flask_render_template
|
|
return render_form(
|
|
watch=watch,
|
|
datastore=datastore,
|
|
request=request,
|
|
url_for=url_for,
|
|
render_template=flask_render_template,
|
|
flash=flash,
|
|
redirect=redirect,
|
|
extract_form=extract_form
|
|
)
|
|
|
|
extract_regex = request.form.get('extract_regex', '').strip()
|
|
output = watch.extract_regex_from_all_history(extract_regex)
|
|
|
|
if output:
|
|
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
|
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
|
response.headers['Content-type'] = 'text/csv'
|
|
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
|
response.headers['Pragma'] = 'no-cache'
|
|
response.headers['Expires'] = "0"
|
|
return response
|
|
|
|
flash(gettext('No matches found while scanning all of the watch history for that RegEx.'), 'error')
|
|
return redirect(url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid))
|