mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-17 05:26:30 +00:00
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
132 lines
4.9 KiB
Python
132 lines
4.9 KiB
Python
"""
|
|
Base data extraction module for all processors.
|
|
|
|
This module handles extracting data from watch history using regex patterns
|
|
and exporting to CSV format. This is the default extractor that all processors
|
|
(text_json_diff, restock_diff, etc.) can use by default or override.
|
|
"""
|
|
|
|
import os
|
|
from loguru import logger
|
|
|
|
|
|
def render_form(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None):
|
|
"""
|
|
Render the data extraction form.
|
|
|
|
Args:
|
|
watch: The watch object
|
|
datastore: The ChangeDetectionStore instance
|
|
request: Flask request object
|
|
url_for: Flask url_for function
|
|
render_template: Flask render_template function
|
|
flash: Flask flash function
|
|
redirect: Flask redirect function
|
|
extract_form: Optional pre-built extract form (for error cases)
|
|
|
|
Returns:
|
|
Rendered HTML response with the extraction form
|
|
"""
|
|
from changedetectionio import forms
|
|
|
|
uuid = watch.get('uuid')
|
|
|
|
# Use provided form or create a new one
|
|
if extract_form is None:
|
|
extract_form = forms.extractDataForm(
|
|
formdata=request.form,
|
|
data={'extract_regex': request.form.get('extract_regex', '')}
|
|
)
|
|
|
|
# Get error information for the template
|
|
screenshot_url = watch.get_screenshot()
|
|
|
|
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
|
is_html_webdriver = False
|
|
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
|
is_html_webdriver = True
|
|
|
|
password_enabled_and_share_is_off = False
|
|
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
|
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
|
|
|
# Use the shared default template from processors/templates/
|
|
# Processors can override this by creating their own extract.py with custom template logic
|
|
output = render_template(
|
|
"extract.html",
|
|
uuid=uuid,
|
|
extract_form=extract_form,
|
|
watch_a=watch,
|
|
last_error=watch['last_error'],
|
|
last_error_screenshot=watch.get_error_snapshot(),
|
|
last_error_text=watch.get_error_text(),
|
|
screenshot=screenshot_url,
|
|
is_html_webdriver=is_html_webdriver,
|
|
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
|
extra_title=f" - {watch.label} - Extract Data",
|
|
extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')],
|
|
pure_menu_fixed=False
|
|
)
|
|
|
|
return output
|
|
|
|
|
|
def process_extraction(watch, datastore, request, url_for, make_response, send_from_directory, flash, redirect, extract_form=None):
|
|
"""
|
|
Process the data extraction request and return CSV file.
|
|
|
|
Args:
|
|
watch: The watch object
|
|
datastore: The ChangeDetectionStore instance
|
|
request: Flask request object
|
|
url_for: Flask url_for function
|
|
make_response: Flask make_response function
|
|
send_from_directory: Flask send_from_directory function
|
|
flash: Flask flash function
|
|
redirect: Flask redirect function
|
|
extract_form: Optional pre-built extract form
|
|
|
|
Returns:
|
|
CSV file download response or redirect to form on error
|
|
"""
|
|
from changedetectionio import forms
|
|
|
|
uuid = watch.get('uuid')
|
|
|
|
# Use provided form or create a new one
|
|
if extract_form is None:
|
|
extract_form = forms.extractDataForm(
|
|
formdata=request.form,
|
|
data={'extract_regex': request.form.get('extract_regex', '')}
|
|
)
|
|
|
|
if not extract_form.validate():
|
|
flash("An error occurred, please see below.", "error")
|
|
# render_template needs to be imported from Flask for this to work
|
|
from flask import render_template as flask_render_template
|
|
return render_form(
|
|
watch=watch,
|
|
datastore=datastore,
|
|
request=request,
|
|
url_for=url_for,
|
|
render_template=flask_render_template,
|
|
flash=flash,
|
|
redirect=redirect,
|
|
extract_form=extract_form
|
|
)
|
|
|
|
extract_regex = request.form.get('extract_regex', '').strip()
|
|
output = watch.extract_regex_from_all_history(extract_regex)
|
|
|
|
if output:
|
|
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
|
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
|
response.headers['Content-type'] = 'text/csv'
|
|
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
|
response.headers['Pragma'] = 'no-cache'
|
|
response.headers['Expires'] = "0"
|
|
return response
|
|
|
|
flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
|
|
return redirect(url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid))
|