mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-07 18:17:19 +00:00
Compare commits
8 Commits
janus-queu
...
openai-int
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
df9258a8f7 | ||
|
|
c070265668 | ||
|
|
48921c878d | ||
|
|
44384386cc | ||
|
|
3513676bc6 | ||
|
|
559b729475 | ||
|
|
8937df7b0b | ||
|
|
9a015041a5 |
5
.github/test/Dockerfile-alpine
vendored
5
.github/test/Dockerfile-alpine
vendored
@@ -2,7 +2,7 @@
|
||||
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
|
||||
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
|
||||
|
||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.22
|
||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.21
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
COPY requirements.txt /requirements.txt
|
||||
@@ -24,13 +24,12 @@ RUN \
|
||||
apk add --update --no-cache \
|
||||
libjpeg \
|
||||
libxslt \
|
||||
file \
|
||||
nodejs \
|
||||
poppler-utils \
|
||||
python3 && \
|
||||
echo "**** pip3 install test of changedetection.io ****" && \
|
||||
python3 -m venv /lsiopy && \
|
||||
pip install -U pip wheel setuptools && \
|
||||
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.22/ -r /requirements.txt && \
|
||||
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \
|
||||
apk del --purge \
|
||||
build-dependencies
|
||||
|
||||
4
.github/workflows/pypi-release.yml
vendored
4
.github/workflows/pypi-release.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v5
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v5
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
@@ -84,9 +84,6 @@ COPY changedetection.py /app/changedetection.py
|
||||
ARG LOGGER_LEVEL=''
|
||||
ENV LOGGER_LEVEL="$LOGGER_LEVEL"
|
||||
|
||||
# Default
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
WORKDIR /app
|
||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.50.8'
|
||||
__version__ = '0.50.7'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
@@ -35,22 +35,13 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
app.config.exit.set()
|
||||
datastore.stop_thread = True
|
||||
|
||||
# Shutdown workers and queues immediately
|
||||
# Shutdown workers immediately
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down workers: {str(e)}")
|
||||
|
||||
# Close janus queues properly
|
||||
try:
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
update_q.close()
|
||||
notification_q.close()
|
||||
logger.debug("Janus queues closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close janus queues: {e}")
|
||||
|
||||
# Shutdown socketio server fast
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
if socketio_server and hasattr(socketio_server, 'shutdown'):
|
||||
|
||||
@@ -7,7 +7,6 @@ from changedetectionio.flask_app import watch_check_update
|
||||
import asyncio
|
||||
import importlib
|
||||
import os
|
||||
import queue
|
||||
import time
|
||||
|
||||
from loguru import logger
|
||||
@@ -38,23 +37,13 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
watch = None
|
||||
|
||||
try:
|
||||
# Use native janus async interface - no threads needed!
|
||||
queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0)
|
||||
|
||||
# Use asyncio wait_for to make queue.get() cancellable
|
||||
queued_item_data = await asyncio.wait_for(q.get(), timeout=1.0)
|
||||
except asyncio.TimeoutError:
|
||||
# No jobs available, continue loop
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}")
|
||||
|
||||
# Log queue health for debugging
|
||||
try:
|
||||
queue_size = q.qsize()
|
||||
is_empty = q.empty()
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health - size: {queue_size}, empty: {is_empty}")
|
||||
except Exception as health_e:
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health check failed: {health_e}")
|
||||
|
||||
logger.error(f"Worker {worker_id} error getting queue item: {e}")
|
||||
await asyncio.sleep(0.1)
|
||||
continue
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_simple_field, render_button, render_time_schedule_form %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script>
|
||||
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}";
|
||||
@@ -23,6 +23,7 @@
|
||||
<li class="tab"><a href="#fetching">Fetching</a></li>
|
||||
<li class="tab"><a href="#filters">Global Filters</a></li>
|
||||
<li class="tab"><a href="#ui-options">UI Options</a></li>
|
||||
<li class="tab"><a href="#ai-options"><i data-feather="aperture" style="width: 14px; height: 14px; margin-right: 4px;"></i> AI</a></li>
|
||||
<li class="tab"><a href="#api">API</a></li>
|
||||
<li class="tab"><a href="#timedate">Time & Date</a></li>
|
||||
<li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li>
|
||||
@@ -262,6 +263,24 @@ nav
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="ai-options">
|
||||
<p><strong>New:</strong> click here (link to changedetection.io tutorial page) find out how to setup and example</p>
|
||||
<br>
|
||||
key fields should be some password type field so you can see its set but doesnt contain the key on view and doesnt lose it on save<br>
|
||||
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_simple_field(form.application.form.ai.form.LLM_backend) }}
|
||||
<span class="pure-form-message-inline">Preferred LLM connection</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ai.form.API_keys.form.openai) }}
|
||||
<span class="pure-form-message-inline">Go here to read more about OpenAI integration</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ai.form.API_keys.form.gemini) }}
|
||||
<span class="pure-form-message-inline">Go here to read more about Google Gemini integration</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="proxies">
|
||||
<div id="recommended-proxy">
|
||||
<div>
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab" id=""><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
<li class="tab"><a href="#filters-and-triggers">AI, Filters & Triggers</a></li>
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
|
||||
@@ -312,8 +312,27 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
||||
from flask import jsonify
|
||||
from changedetectionio.processors.text_json_diff import prepare_filter_prevew
|
||||
result = prepare_filter_prevew(watch_uuid=uuid, form_data=request.form, datastore=datastore)
|
||||
return jsonify(result)
|
||||
|
||||
watch = datastore.data["watching"].get(uuid)
|
||||
|
||||
if not watch:
|
||||
return jsonify({
|
||||
"error": "Watch not found",
|
||||
"code": 400
|
||||
}), 400
|
||||
|
||||
if not watch.history_n:
|
||||
return jsonify({
|
||||
"error": "Watch has empty history, at least one fetch of the page is required.",
|
||||
"code": 400
|
||||
}), 400
|
||||
#
|
||||
try:
|
||||
result = prepare_filter_prevew(watch_uuid=uuid, form_data=request.form, datastore=datastore)
|
||||
return jsonify(result)
|
||||
except Exception as e:
|
||||
return abort(500, str(e))
|
||||
|
||||
|
||||
@edit_blueprint.route("/highlight_submit_ignore_url", methods=['POST'])
|
||||
@login_optionally_required
|
||||
|
||||
@@ -93,15 +93,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# For submission of requesting an extract
|
||||
extract_form = forms.extractDataForm(formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
extract_form = forms.extractDataForm(request.form)
|
||||
if not extract_form.validate():
|
||||
flash("An error occurred, please see below.", "error")
|
||||
return _render_diff_template(uuid, extract_form)
|
||||
|
||||
else:
|
||||
extract_regex = request.form.get('extract_regex', '').strip()
|
||||
extract_regex = request.form.get('extract_regex').strip()
|
||||
output = watch.extract_regex_from_all_history(extract_regex)
|
||||
if output:
|
||||
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
@@ -112,11 +109,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
response.headers['Expires'] = "0"
|
||||
return response
|
||||
|
||||
flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
|
||||
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid) + '#extract')
|
||||
flash('Nothing matches that RegEx', 'error')
|
||||
redirect(url_for('ui_views.diff_history_page', uuid=uuid) + '#extract')
|
||||
|
||||
def _render_diff_template(uuid, extract_form=None):
|
||||
"""Helper function to render the diff template with all required data"""
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
from changedetectionio import forms
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
@@ -130,11 +128,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Use provided form or create a new one
|
||||
if extract_form is None:
|
||||
extract_form = forms.extractDataForm(formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
# For submission of requesting an extract
|
||||
extract_form = forms.extractDataForm(request.form)
|
||||
|
||||
history = watch.history
|
||||
dates = list(history.keys())
|
||||
@@ -175,7 +170,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
datastore.set_last_viewed(uuid, time.time())
|
||||
|
||||
return render_template("diff.html",
|
||||
output = render_template("diff.html",
|
||||
current_diff_url=watch['url'],
|
||||
from_version=str(from_version),
|
||||
to_version=str(to_version),
|
||||
@@ -198,10 +193,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
watch_a=watch
|
||||
)
|
||||
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
return _render_diff_template(uuid)
|
||||
return output
|
||||
|
||||
@views_blueprint.route("/form/add/quickwatch", methods=['POST'])
|
||||
@login_optionally_required
|
||||
@@ -220,7 +212,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||
processor = request.form.get('processor', 'text_json_diff')
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
extras = {'paused': add_paused, 'processor': processor}
|
||||
|
||||
LLM_prompt = request.form.get('LLM_prompt', '').strip()
|
||||
if LLM_prompt:
|
||||
extras['LLM_prompt'] = LLM_prompt
|
||||
extras['LLM_send_type'] = request.form.get('LLM_send_type', 'text')
|
||||
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras=extras)
|
||||
|
||||
if new_uuid:
|
||||
if add_paused:
|
||||
|
||||
@@ -5,12 +5,7 @@
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||
<script>let nowtimeserver={{ now_time_server }};</script>
|
||||
<script>let favicon_baseURL="{{ url_for('static_content', group='favicon', filename="PLACEHOLDER")}}";</script>
|
||||
<script>
|
||||
// Initialize Feather icons after the page loads
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
feather.replace();
|
||||
});
|
||||
</script>
|
||||
|
||||
<style>
|
||||
.checking-now .last-checked {
|
||||
background-image: linear-gradient(to bottom, transparent 0%, rgba(0,0,0,0.05) 40%, rgba(0,0,0,0.1) 100%);
|
||||
@@ -31,8 +26,12 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
{{ render_nolabel_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
|
||||
</div>
|
||||
<div id="watch-group-tag">
|
||||
<i data-feather="tag" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>
|
||||
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="Watch group / tag", class="transparent-field") }}
|
||||
</div>
|
||||
|
||||
{%- include 'edit/llm_prompt.html' -%}
|
||||
|
||||
<div id="quick-watch-processor-type">
|
||||
{{ render_simple_field(form.processor) }}
|
||||
</div>
|
||||
|
||||
@@ -12,7 +12,7 @@ from blinker import signal
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from threading import Event
|
||||
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
|
||||
from changedetectionio.custom_queue import SignalPriorityQueue, AsyncSignalPriorityQueue, NotificationQueue
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
from flask import (
|
||||
@@ -48,8 +48,8 @@ datastore = None
|
||||
ticker_thread = None
|
||||
extra_stylesheets = []
|
||||
|
||||
# Use bulletproof janus-based queues for sync/async reliability
|
||||
update_q = RecheckPriorityQueue()
|
||||
# Use async queue by default, keep sync for backward compatibility
|
||||
update_q = AsyncSignalPriorityQueue() if worker_handler.USE_ASYNC_WORKERS else SignalPriorityQueue()
|
||||
notification_q = NotificationQueue()
|
||||
MAX_QUEUE_SIZE = 2000
|
||||
|
||||
@@ -844,22 +844,16 @@ def ticker_thread_check_time_launch_checks():
|
||||
|
||||
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
||||
priority = int(time.time())
|
||||
logger.debug(
|
||||
f"> Queued watch UUID {uuid} "
|
||||
f"last checked at {watch['last_checked']} "
|
||||
f"queued at {now:0.2f} priority {priority} "
|
||||
f"jitter {watch.jitter_seconds:0.2f}s, "
|
||||
f"{now - watch['last_checked']:0.2f}s since last checked")
|
||||
|
||||
# Into the queue with you
|
||||
queued_successfully = worker_handler.queue_item_async_safe(update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=priority,
|
||||
item={'uuid': uuid})
|
||||
)
|
||||
if queued_successfully:
|
||||
logger.debug(
|
||||
f"> Queued watch UUID {uuid} "
|
||||
f"last checked at {watch['last_checked']} "
|
||||
f"queued at {now:0.2f} priority {priority} "
|
||||
f"jitter {watch.jitter_seconds:0.2f}s, "
|
||||
f"{now - watch['last_checked']:0.2f}s since last checked")
|
||||
else:
|
||||
logger.critical(f"CRITICAL: Failed to queue watch UUID {uuid} in ticker thread!")
|
||||
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
|
||||
|
||||
# Reset for next time
|
||||
watch.jitter_seconds = 0
|
||||
|
||||
|
||||
@@ -55,6 +55,18 @@ valid_method = {
|
||||
default_method = 'GET'
|
||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||
|
||||
LLM_example_texts = ['Tell me simply "Price, In stock"',
|
||||
'Give me a list of all products for sale in this text',
|
||||
'Tell me simply "Yes" "No" or "Maybe" if you think the weather outlook is good for a 4-day small camping trip',
|
||||
'Look at this restaurant menu and only give me list of meals you think are good for type 2 diabetics, if nothing is found just say "nothing"',
|
||||
]
|
||||
|
||||
LLM_send_type_choices = [('text', 'Plain text after filters'),
|
||||
('above_fold_text', 'Text above the fold'),
|
||||
('Screenshot', 'Screenshot / Selection'),
|
||||
('HTML', 'HTML Source')
|
||||
]
|
||||
|
||||
class StringListField(StringField):
|
||||
widget = widgets.TextArea()
|
||||
|
||||
@@ -396,19 +408,6 @@ def validate_url(test_url):
|
||||
# This should be wtforms.validators.
|
||||
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format')
|
||||
|
||||
|
||||
class ValidateSinglePythonRegexString(object):
|
||||
def __init__(self, message=None):
|
||||
self.message = message
|
||||
|
||||
def __call__(self, form, field):
|
||||
try:
|
||||
re.compile(field.data)
|
||||
except re.error:
|
||||
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
||||
raise ValidationError(message % (field.data))
|
||||
|
||||
|
||||
class ValidateListRegex(object):
|
||||
"""
|
||||
Validates that anything that looks like a regex passes as a regex
|
||||
@@ -427,7 +426,6 @@ class ValidateListRegex(object):
|
||||
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
||||
raise ValidationError(message % (line))
|
||||
|
||||
|
||||
class ValidateCSSJSONXPATHInput(object):
|
||||
"""
|
||||
Filter validation
|
||||
@@ -529,11 +527,15 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
|
||||
class quickWatchForm(Form):
|
||||
from . import processors
|
||||
import random
|
||||
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group tag', [validators.Optional()])
|
||||
watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
|
||||
LLM_prompt = TextAreaField(u'AI Prompt', [validators.Optional()], render_kw={"placeholder": f'Example, "{random.choice(LLM_example_texts)}"'})
|
||||
LLM_send_type = RadioField(u'LLM Send', choices=LLM_send_type_choices, default="text")
|
||||
|
||||
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
@@ -541,6 +543,7 @@ class quickWatchForm(Form):
|
||||
# Common to a single watch and the global settings
|
||||
class commonSettingsForm(Form):
|
||||
from . import processors
|
||||
import random
|
||||
|
||||
def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs):
|
||||
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
|
||||
@@ -558,6 +561,8 @@ class commonSettingsForm(Form):
|
||||
timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")])
|
||||
|
||||
LLM_prompt = TextAreaField(u'AI Prompt', [validators.Optional()], render_kw={"placeholder": f'Example, "{random.choice(LLM_example_texts)}"'})
|
||||
LLM_send_type = RadioField(u'LLM Send', choices=LLM_send_type_choices, default="text")
|
||||
|
||||
class importForm(Form):
|
||||
from . import processors
|
||||
@@ -756,6 +761,29 @@ class globalSettingsApplicationUIForm(Form):
|
||||
socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
|
||||
favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()])
|
||||
|
||||
class globalSettingsApplicationAIKeysForm(Form):
|
||||
|
||||
openai = StringField('OpenAI Key',
|
||||
validators=[validators.Optional()],
|
||||
render_kw={"placeholder": 'xxxxxxxxx'}
|
||||
)
|
||||
gemini = StringField('Google Gemini Key',
|
||||
validators=[validators.Optional()],
|
||||
render_kw={"placeholder": 'ooooooooo'}
|
||||
)
|
||||
|
||||
class globalSettingsApplicationAIForm(Form):
|
||||
|
||||
#@todo use only configured types?
|
||||
LLM_backend = RadioField(u'LLM Backend',
|
||||
choices=[('openai', 'Open AI'), ('gemini', 'Gemini')],
|
||||
default="text")
|
||||
|
||||
# So that we can pass this to our LLM/__init__.py as a keys dict
|
||||
API_keys = FormField(globalSettingsApplicationAIKeysForm)
|
||||
|
||||
|
||||
|
||||
# datastore.data['settings']['application']..
|
||||
class globalSettingsApplicationForm(commonSettingsForm):
|
||||
|
||||
@@ -788,6 +816,8 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
message="Should contain zero or more attempts")])
|
||||
ui = FormField(globalSettingsApplicationUIForm)
|
||||
|
||||
ai = FormField(globalSettingsApplicationAIForm)
|
||||
|
||||
|
||||
class globalSettingsForm(Form):
|
||||
# Define these as FormFields/"sub forms", this way it matches the JSON storage
|
||||
@@ -805,5 +835,5 @@ class globalSettingsForm(Form):
|
||||
|
||||
|
||||
class extractDataForm(Form):
|
||||
extract_regex = StringField('RegEx to extract', validators=[validators.DataRequired(), ValidateSinglePythonRegexString()])
|
||||
extract_regex = StringField('RegEx to extract', validators=[validators.Length(min=1, message="Needs a RegEx")])
|
||||
extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
@@ -65,6 +65,10 @@ class model(dict):
|
||||
'socket_io_enabled': True,
|
||||
'favicons_enabled': True
|
||||
},
|
||||
'ai': {
|
||||
'openai_key': None,
|
||||
'gemini_key': None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -639,7 +639,7 @@ class model(watch_base):
|
||||
if res:
|
||||
if not csv_writer:
|
||||
# A file on the disk can be transferred much faster via flask than a string reply
|
||||
csv_output_filename = f"report-{self.get('uuid')}.csv"
|
||||
csv_output_filename = 'report.csv'
|
||||
f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
|
||||
# @todo some headers in the future
|
||||
#fieldnames = ['Epoch seconds', 'Date']
|
||||
|
||||
@@ -3,7 +3,6 @@ import uuid
|
||||
|
||||
from changedetectionio import strtobool
|
||||
default_notification_format_for_watch = 'System default'
|
||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
||||
|
||||
class watch_base(dict):
|
||||
|
||||
@@ -16,8 +15,6 @@ class watch_base(dict):
|
||||
'body': None,
|
||||
'browser_steps': [],
|
||||
'browser_steps_last_error_step': None,
|
||||
'conditions' : {},
|
||||
'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT,
|
||||
'check_count': 0,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
@@ -41,6 +38,9 @@ class watch_base(dict):
|
||||
'last_error': False,
|
||||
'last_notification_error': None,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
'LLM_prompt': None,
|
||||
'LLM_send_type': None,
|
||||
'LLM_backend': None,
|
||||
'method': 'GET',
|
||||
'notification_alert_count': 0,
|
||||
'notification_body': None,
|
||||
|
||||
64
changedetectionio/processors/LLM/__init__.py
Normal file
64
changedetectionio/processors/LLM/__init__.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import importlib
|
||||
from langchain_core.messages import SystemMessage, HumanMessage
|
||||
|
||||
SYSTEM_MESSAGE = (
|
||||
"You are a text analyser who will attempt to give the most concise information "
|
||||
"to the request, the information should be returned in a way that if I ask you again "
|
||||
"I should get the same answer if the outcome is the same. The goal is to cut down "
|
||||
"or reduce the text changes from you when i ask the same question about similar content "
|
||||
"Always list items in exactly the same order and wording as found in the source text. "
|
||||
)
|
||||
|
||||
|
||||
class LLM_integrate:
|
||||
PROVIDER_MAP = {
|
||||
"openai": ("langchain_openai", "ChatOpenAI"),
|
||||
"azure": ("langchain_community.chat_models", "AzureChatOpenAI"),
|
||||
"gemini": ("langchain_google_genai", "ChatGoogleGenerativeAI")
|
||||
}
|
||||
|
||||
def __init__(self, api_keys: dict):
|
||||
"""
|
||||
api_keys = {
|
||||
"openai": "sk-xxx",
|
||||
"azure": "AZURE_KEY",
|
||||
"gemini": "GEMINI_KEY"
|
||||
}
|
||||
"""
|
||||
self.api_keys = api_keys
|
||||
|
||||
def run(self, provider: str, model: str, message: str):
|
||||
module_name, class_name = self.PROVIDER_MAP[provider]
|
||||
|
||||
# Import the class dynamically
|
||||
module = importlib.import_module(module_name)
|
||||
LLMClass = getattr(module, class_name)
|
||||
|
||||
# Create the LLM object
|
||||
llm_kwargs = {}
|
||||
if provider == "openai":
|
||||
llm_kwargs = dict(api_key=self.api_keys.get("openai", ''),
|
||||
model=model,
|
||||
# https://api.python.langchain.com/en/latest/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html#langchain_openai.chat_models.base.ChatOpenAI.temperature
|
||||
temperature=0 # most deterministic,
|
||||
)
|
||||
elif provider == "azure":
|
||||
llm_kwargs = dict(
|
||||
api_key=self.api_keys["azure"],
|
||||
azure_endpoint="https://<your-endpoint>.openai.azure.com",
|
||||
deployment_name=model
|
||||
)
|
||||
elif provider == "gemini":
|
||||
llm_kwargs = dict(api_key=self.api_keys.get("gemini"), model=model)
|
||||
|
||||
llm = LLMClass(**llm_kwargs)
|
||||
|
||||
# Build your messages
|
||||
messages = [
|
||||
SystemMessage(content=SYSTEM_MESSAGE),
|
||||
HumanMessage(content=message)
|
||||
]
|
||||
|
||||
# Run the model asynchronously
|
||||
result = llm.invoke(messages)
|
||||
return result.content
|
||||
@@ -1,5 +1,6 @@
|
||||
from abc import abstractmethod
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.processors.LLM import LLM_integrate
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from copy import deepcopy
|
||||
from loguru import logger
|
||||
|
||||
@@ -7,7 +7,7 @@ import re
|
||||
import urllib3
|
||||
|
||||
from changedetectionio.conditions import execute_ruleset_against_all_plugins
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
from changedetectionio.processors import difference_detection_processor, LLM_integrate
|
||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
|
||||
from changedetectionio import html_tools, content_fetchers
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||
@@ -293,6 +293,30 @@ class perform_site_check(difference_detection_processor):
|
||||
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
|
||||
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
|
||||
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
|
||||
### OPENAI?
|
||||
|
||||
|
||||
# And here we run LLM integration based on the content we received
|
||||
LLM_keys = self.datastore.data['settings']['application']['ai'].get('API_keys', {})
|
||||
if watch.get('LLM_prompt') and stripped_text_from_html and LLM_keys:
|
||||
response = ""
|
||||
try:
|
||||
integrator = LLM_integrate(api_keys=LLM_keys)
|
||||
response = integrator.run(
|
||||
provider="openai",
|
||||
model="gpt-4.1", #gpt-4-turbo
|
||||
message=f"{watch.get('LLM_prompt')}\n----------- Content follows-----------\n\n{stripped_text_from_html}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.critical(f"Error running LLM integration {str(e)} (type etc)")
|
||||
raise(e)
|
||||
x = 1
|
||||
# todo is there something special when tokens are used up etc?
|
||||
else:
|
||||
stripped_text_from_html = response
|
||||
# logger.trace("LLM done")
|
||||
finally:
|
||||
logger.debug("LLM request done (type etc)")
|
||||
|
||||
### CALCULATE MD5
|
||||
# If there's text to ignore
|
||||
|
||||
@@ -1,430 +0,0 @@
|
||||
import heapq
|
||||
import threading
|
||||
from typing import Dict, List, Any, Optional
|
||||
from blinker import signal
|
||||
from loguru import logger
|
||||
|
||||
try:
|
||||
import janus
|
||||
except ImportError:
|
||||
logger.critical("CRITICAL: janus library is required. Install with: pip install janus")
|
||||
raise
|
||||
|
||||
|
||||
class RecheckPriorityQueue:
|
||||
"""
|
||||
Ultra-reliable priority queue using janus for async/sync bridging.
|
||||
|
||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
||||
- async_q: Used by async workers (the actual fetchers/processors) and coroutines
|
||||
|
||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts:
|
||||
- Synchronous code (Flask, threads) cannot use async methods without blocking
|
||||
- Async code cannot use sync methods without blocking the event loop
|
||||
- janus provides the only safe bridge between these two worlds
|
||||
|
||||
Attempting to unify to async-only would require:
|
||||
- Converting all Flask routes to async (major breaking change)
|
||||
- Using asyncio.run() in sync contexts (causes deadlocks)
|
||||
- Thread-pool wrapping (adds complexity and overhead)
|
||||
|
||||
Minimal implementation focused on reliability:
|
||||
- Pure janus for sync/async bridge
|
||||
- Thread-safe priority ordering
|
||||
- Bulletproof error handling with critical logging
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize: int = 0):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, ticker thread
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
|
||||
# Priority storage - thread-safe
|
||||
self._priority_items = []
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Signals for UI updates
|
||||
self.queue_length_signal = signal('queue_length')
|
||||
|
||||
logger.debug("RecheckPriorityQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {e}")
|
||||
raise
|
||||
|
||||
# SYNC INTERFACE (for ticker thread)
|
||||
def put(self, item, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with priority ordering"""
|
||||
try:
|
||||
# Add to priority storage
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus sync queue
|
||||
self.sync_q.put(True, block=block, timeout=timeout)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
logger.debug(f"Successfully queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {e}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after put failure: {cleanup_e}")
|
||||
return False
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get with priority ordering"""
|
||||
try:
|
||||
# Wait for notification
|
||||
self.sync_q.get(block=block, timeout=timeout)
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
if not self._priority_items:
|
||||
logger.critical("CRITICAL: Queue notification received but no priority items available")
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
|
||||
logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get item from queue: {e}")
|
||||
raise
|
||||
|
||||
# ASYNC INTERFACE (for workers)
|
||||
async def async_put(self, item):
|
||||
"""Pure async put with priority ordering"""
|
||||
try:
|
||||
# Add to priority storage
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus async queue
|
||||
await self.async_q.put(True)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {e}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {cleanup_e}")
|
||||
return False
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get with priority ordering"""
|
||||
try:
|
||||
# Wait for notification
|
||||
await self.async_q.get()
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
if not self._priority_items:
|
||||
logger.critical("CRITICAL: Async queue notification received but no priority items available")
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
|
||||
logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get item from queue: {e}")
|
||||
raise
|
||||
|
||||
# UTILITY METHODS
|
||||
def qsize(self) -> int:
|
||||
"""Get current queue size"""
|
||||
try:
|
||||
with self._lock:
|
||||
return len(self._priority_items)
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get queue size: {e}")
|
||||
return 0
|
||||
|
||||
def empty(self) -> bool:
|
||||
"""Check if queue is empty"""
|
||||
return self.qsize() == 0
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
logger.debug("RecheckPriorityQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {e}")
|
||||
|
||||
# COMPATIBILITY METHODS (from original implementation)
|
||||
@property
|
||||
def queue(self):
|
||||
"""Provide compatibility with original queue access"""
|
||||
try:
|
||||
with self._lock:
|
||||
return list(self._priority_items)
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get queue list: {e}")
|
||||
return []
|
||||
|
||||
def get_uuid_position(self, target_uuid: str) -> Dict[str, Any]:
|
||||
"""Find position of UUID in queue"""
|
||||
try:
|
||||
with self._lock:
|
||||
queue_list = list(self._priority_items)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
|
||||
|
||||
# Find target item
|
||||
for item in queue_list:
|
||||
if (hasattr(item, 'item') and isinstance(item.item, dict) and
|
||||
item.item.get('uuid') == target_uuid):
|
||||
|
||||
# Count items with higher priority
|
||||
position = sum(1 for other in queue_list if other.priority < item.priority)
|
||||
return {
|
||||
'position': position,
|
||||
'total_items': total_items,
|
||||
'priority': item.priority,
|
||||
'found': True
|
||||
}
|
||||
|
||||
return {'position': None, 'total_items': total_items, 'priority': None, 'found': False}
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get UUID position for {target_uuid}: {e}")
|
||||
return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
|
||||
|
||||
def get_all_queued_uuids(self, limit: Optional[int] = None, offset: int = 0) -> Dict[str, Any]:
|
||||
"""Get all queued UUIDs with pagination"""
|
||||
try:
|
||||
with self._lock:
|
||||
queue_list = sorted(self._priority_items) # Sort by priority
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
|
||||
|
||||
# Apply pagination
|
||||
end_idx = min(offset + limit, total_items) if limit else total_items
|
||||
items_to_process = queue_list[offset:end_idx]
|
||||
|
||||
result = []
|
||||
for position, item in enumerate(items_to_process, start=offset):
|
||||
if (hasattr(item, 'item') and isinstance(item.item, dict) and
|
||||
'uuid' in item.item):
|
||||
result.append({
|
||||
'uuid': item.item['uuid'],
|
||||
'position': position,
|
||||
'priority': item.priority
|
||||
})
|
||||
|
||||
return {
|
||||
'items': result,
|
||||
'total_items': total_items,
|
||||
'returned_items': len(result),
|
||||
'has_more': (offset + len(result)) < total_items
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get all queued UUIDs: {e}")
|
||||
return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
|
||||
|
||||
def get_queue_summary(self) -> Dict[str, Any]:
|
||||
"""Get queue summary statistics"""
|
||||
try:
|
||||
with self._lock:
|
||||
queue_list = list(self._priority_items)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {
|
||||
'total_items': 0, 'priority_breakdown': {},
|
||||
'immediate_items': 0, 'clone_items': 0, 'scheduled_items': 0
|
||||
}
|
||||
|
||||
immediate_items = clone_items = scheduled_items = 0
|
||||
priority_counts = {}
|
||||
|
||||
for item in queue_list:
|
||||
priority = item.priority
|
||||
priority_counts[priority] = priority_counts.get(priority, 0) + 1
|
||||
|
||||
if priority == 1:
|
||||
immediate_items += 1
|
||||
elif priority == 5:
|
||||
clone_items += 1
|
||||
elif priority > 100:
|
||||
scheduled_items += 1
|
||||
|
||||
return {
|
||||
'total_items': total_items,
|
||||
'priority_breakdown': priority_counts,
|
||||
'immediate_items': immediate_items,
|
||||
'clone_items': clone_items,
|
||||
'scheduled_items': scheduled_items,
|
||||
'min_priority': min(priority_counts.keys()) if priority_counts else None,
|
||||
'max_priority': max(priority_counts.keys()) if priority_counts else None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get queue summary: {e}")
|
||||
return {'total_items': 0, 'priority_breakdown': {}, 'immediate_items': 0,
|
||||
'clone_items': 0, 'scheduled_items': 0}
|
||||
|
||||
# PRIVATE METHODS
|
||||
def _get_item_uuid(self, item) -> str:
|
||||
"""Safely extract UUID from item for logging"""
|
||||
try:
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict):
|
||||
return item.item.get('uuid', 'unknown')
|
||||
except Exception:
|
||||
pass
|
||||
return 'unknown'
|
||||
|
||||
def _emit_put_signals(self, item):
|
||||
"""Emit signals when item is added"""
|
||||
try:
|
||||
# Watch update signal
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=item.item['uuid'])
|
||||
|
||||
# Queue length signal
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit put signals: {e}")
|
||||
|
||||
def _emit_get_signals(self):
|
||||
"""Emit signals when item is removed"""
|
||||
try:
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit get signals: {e}")
|
||||
|
||||
|
||||
class NotificationQueue:
|
||||
"""
|
||||
Ultra-reliable notification queue using pure janus.
|
||||
|
||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
||||
- async_q: Used by async workers and coroutines
|
||||
|
||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts.
|
||||
See RecheckPriorityQueue docstring above for detailed explanation.
|
||||
|
||||
Simple wrapper around janus with bulletproof error handling.
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize: int = 0):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, threads
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
self.notification_event_signal = signal('notification_event')
|
||||
logger.debug("NotificationQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {e}")
|
||||
raise
|
||||
|
||||
def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with signal emission"""
|
||||
try:
|
||||
self.sync_q.put(item, block=block, timeout=timeout)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {e}")
|
||||
return False
|
||||
|
||||
async def async_put(self, item: Dict[str, Any]):
|
||||
"""Pure async put with signal emission"""
|
||||
try:
|
||||
await self.async_q.put(item)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {e}")
|
||||
return False
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get"""
|
||||
try:
|
||||
return self.sync_q.get(block=block, timeout=timeout)
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification: {e}")
|
||||
raise
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get"""
|
||||
try:
|
||||
return await self.async_q.get()
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get notification: {e}")
|
||||
raise
|
||||
|
||||
def qsize(self) -> int:
|
||||
"""Get current queue size"""
|
||||
try:
|
||||
return self.sync_q.qsize()
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification queue size: {e}")
|
||||
return 0
|
||||
|
||||
def empty(self) -> bool:
|
||||
"""Check if queue is empty"""
|
||||
return self.qsize() == 0
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
logger.debug("NotificationQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close NotificationQueue: {e}")
|
||||
|
||||
def _emit_notification_signal(self, item: Dict[str, Any]):
|
||||
"""Emit notification signal"""
|
||||
try:
|
||||
if self.notification_event_signal and isinstance(item, dict):
|
||||
watch_uuid = item.get('uuid')
|
||||
if watch_uuid:
|
||||
self.notification_event_signal.send(watch_uuid=watch_uuid)
|
||||
else:
|
||||
self.notification_event_signal.send()
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit notification signal: {e}")
|
||||
BIN
changedetectionio/static/images/open-ai-logo.png
Normal file
BIN
changedetectionio/static/images/open-ai-logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 15 KiB |
@@ -21,6 +21,7 @@ function request_textpreview_update() {
|
||||
namespace: 'watchEdit'
|
||||
}).done(function (data) {
|
||||
console.debug(data['duration'])
|
||||
$('#error-text').text(data['duration']);
|
||||
$('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']);
|
||||
$('#filters-and-triggers #text-preview-inner')
|
||||
.text(data['after_filter'])
|
||||
@@ -37,9 +38,8 @@ function request_textpreview_update() {
|
||||
}).fail(function (error) {
|
||||
if (error.statusText === 'abort') {
|
||||
console.log('Request was aborted due to a new request being fired.');
|
||||
} else {
|
||||
$('#filters-and-triggers #text-preview-inner').text('There was an error communicating with the server.');
|
||||
}
|
||||
$('#error-text').text(error.responseJSON['error']);
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
@use "_llm-prompt";
|
||||
|
||||
ul#conditions_match_logic {
|
||||
list-style: none;
|
||||
input, label, li {
|
||||
|
||||
@@ -6,19 +6,19 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tr {
|
||||
/* make the icons and the text inline-ish */
|
||||
td.inline.title-col {
|
||||
.flex-wrapper {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
&.favicon-enabled {
|
||||
tr {
|
||||
/* make the icons and the text inline-ish */
|
||||
td.inline.title-col {
|
||||
.flex-wrapper {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
td,
|
||||
th {
|
||||
vertical-align: middle;
|
||||
|
||||
59
changedetectionio/static/styles/scss/parts/_llm-prompt.scss
Normal file
59
changedetectionio/static/styles/scss/parts/_llm-prompt.scss
Normal file
@@ -0,0 +1,59 @@
|
||||
#form-quick-watch-add #openai-prompt {
|
||||
color: var(--color-white);
|
||||
}
|
||||
|
||||
|
||||
#llm-prompt-all {
|
||||
.label {
|
||||
display: block !important;
|
||||
}
|
||||
|
||||
textarea {
|
||||
white-space: pre-wrap;
|
||||
overflow-wrap: break-word;
|
||||
word-wrap: break-word; /* legacy support */
|
||||
font-size: 13px;
|
||||
}
|
||||
ul {
|
||||
list-style: none;
|
||||
padding-left: 0px;
|
||||
|
||||
li {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
|
||||
> * {
|
||||
margin: 0px;
|
||||
padding: 0px;
|
||||
}
|
||||
|
||||
label {
|
||||
font-weight: normal;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 768px) {
|
||||
#llm-prompt-all {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr auto auto;
|
||||
column-gap: 1.5rem;
|
||||
align-items: start;
|
||||
|
||||
font-size: 0.9rem;
|
||||
padding: 0.3rem;
|
||||
|
||||
#llm-prompt {
|
||||
/* ensure the textarea stretches horizontally */
|
||||
width: 100%;
|
||||
|
||||
textarea {
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -792,7 +792,9 @@ textarea::placeholder {
|
||||
border-top-left-radius: 5px;
|
||||
border-top-right-radius: 5px;
|
||||
background-color: var(--color-background-tab);
|
||||
|
||||
svg {
|
||||
stroke: var(--color-text-tab);
|
||||
}
|
||||
&:not(.active) {
|
||||
&:hover {
|
||||
background-color: var(--color-background-tab-hover);
|
||||
@@ -802,11 +804,13 @@ textarea::placeholder {
|
||||
&.active,
|
||||
:target {
|
||||
background-color: var(--color-background);
|
||||
|
||||
a {
|
||||
color: var(--color-text-tab-active);
|
||||
font-weight: bold;
|
||||
}
|
||||
svg {
|
||||
stroke: var(--color-text-tab-active);
|
||||
}
|
||||
}
|
||||
|
||||
a {
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -38,6 +38,12 @@
|
||||
<script src="{{url_for('static_content', group='js', filename='socket.io.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='realtime.js')}}" defer></script>
|
||||
{% endif %}
|
||||
<script>
|
||||
// Initialize Feather icons after the page loads
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
feather.replace();
|
||||
});
|
||||
</script>
|
||||
</head>
|
||||
|
||||
<body class="">
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
|
||||
{% from '_helpers.html' import render_field, render_simple_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
|
||||
@@ -52,7 +52,7 @@
|
||||
<!-- should goto extra forms? -->
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">AI, Filters & Triggers</a></li>
|
||||
<li class="tab" id="conditions-tab"><a href="#conditions">Conditions</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
@@ -316,7 +316,6 @@ Math: {{ 1 + 1 }}") }}
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{% include "edit/include_subtract.html" %}
|
||||
<div class="text-filtering border-fieldset">
|
||||
<fieldset class="pure-group" id="text-filtering-type-options">
|
||||
@@ -364,6 +363,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p id="error-text"></p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
<div class="pure-control-group">
|
||||
<div class="pure-control-group">
|
||||
{%- include 'edit/llm_prompt.html' -%}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{% set field = render_field(form.include_filters,
|
||||
rows=5,
|
||||
placeholder=has_tag_filters_extra+"#example
|
||||
|
||||
12
changedetectionio/templates/edit/llm_prompt.html
Normal file
12
changedetectionio/templates/edit/llm_prompt.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<div class="pure-control-group" id="ai-filter-options">
|
||||
<div id="openai-prompt">
|
||||
<div id="llm-prompt-all">
|
||||
<div id="llm-prompt">
|
||||
{{ render_simple_field(form.LLM_prompt, rows=5) }}
|
||||
</div>
|
||||
<div id="llm-send-type">
|
||||
{{ render_simple_field(form.LLM_send_type) }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -292,7 +292,9 @@ def test_access_denied(client, live_server, measure_memory_usage):
|
||||
|
||||
def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
|
||||
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Create a watch
|
||||
set_original_response()
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
@@ -300,27 +302,14 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
|
||||
# Create new
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": test_url,
|
||||
'tag': "One, Two",
|
||||
"title": "My test URL",
|
||||
'headers': {'cookie': 'yum'},
|
||||
"conditions": [
|
||||
{
|
||||
"field": "page_filtered_text",
|
||||
"operator": "contains_regex",
|
||||
"value": "." # contains anything
|
||||
}
|
||||
],
|
||||
"conditions_match_logic": "ALL"
|
||||
}
|
||||
),
|
||||
data=json.dumps({"url": test_url, 'tag': "One, Two", "title": "My test URL", 'headers': {'cookie': 'yum'} }),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 201
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Get a listing, it will be the first one
|
||||
res = client.get(
|
||||
url_for("createwatch"),
|
||||
|
||||
@@ -4,8 +4,6 @@ import time
|
||||
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
from ..model import CONDITIONS_MATCH_LOGIC_DEFAULT
|
||||
|
||||
|
||||
def set_original_response(number="50"):
|
||||
test_return_data = f"""<html>
|
||||
@@ -78,7 +76,7 @@ def test_conditions_with_text_and_number(client, live_server):
|
||||
"fetch_backend": "html_requests",
|
||||
"include_filters": ".number-container",
|
||||
"title": "Number AND Text Condition Test",
|
||||
"conditions_match_logic": CONDITIONS_MATCH_LOGIC_DEFAULT, # ALL = AND logic
|
||||
"conditions_match_logic": "ALL", # ALL = AND logic
|
||||
"conditions-0-operator": "in",
|
||||
"conditions-0-field": "page_filtered_text",
|
||||
"conditions-0-value": "5",
|
||||
@@ -285,7 +283,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
|
||||
data={
|
||||
"url": test_url,
|
||||
"fetch_backend": "html_requests",
|
||||
"conditions_match_logic": CONDITIONS_MATCH_LOGIC_DEFAULT, # ALL = AND logic
|
||||
"conditions_match_logic": "ALL", # ALL = AND logic
|
||||
"conditions-0-field": "levenshtein_ratio",
|
||||
"conditions-0-operator": "<",
|
||||
"conditions-0-value": "0.8" # needs to be more of a diff to trigger a change
|
||||
|
||||
@@ -46,7 +46,7 @@ def test_check_extract_text_from_diff(client, live_server, measure_memory_usage)
|
||||
follow_redirects=False
|
||||
)
|
||||
|
||||
assert b'No matches found while scanning all of the watch history for that RegEx.' not in res.data
|
||||
assert b'Nothing matches that RegEx' not in res.data
|
||||
assert res.content_type == 'text/csv'
|
||||
|
||||
# Read the csv reply as stringio
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from changedetectionio.conditions import execute_ruleset_against_all_plugins
|
||||
from changedetectionio.model import CONDITIONS_MATCH_LOGIC_DEFAULT
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
import shutil
|
||||
import tempfile
|
||||
@@ -60,7 +59,7 @@ class TestTriggerConditions(unittest.TestCase):
|
||||
|
||||
self.store.data['watching'][self.watch_uuid].update(
|
||||
{
|
||||
"conditions_match_logic": CONDITIONS_MATCH_LOGIC_DEFAULT,
|
||||
"conditions_match_logic": "ALL",
|
||||
"conditions": [
|
||||
{"operator": ">=", "field": "extracted_number", "value": "10"},
|
||||
{"operator": "<=", "field": "extracted_number", "value": "5000"},
|
||||
|
||||
@@ -188,54 +188,15 @@ def is_watch_running(watch_uuid):
|
||||
|
||||
|
||||
def queue_item_async_safe(update_q, item):
|
||||
"""Bulletproof queue operation with comprehensive error handling"""
|
||||
item_uuid = 'unknown'
|
||||
|
||||
try:
|
||||
# Safely extract UUID for logging
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict):
|
||||
item_uuid = item.item.get('uuid', 'unknown')
|
||||
except Exception as uuid_e:
|
||||
logger.critical(f"CRITICAL: Failed to extract UUID from queue item: {uuid_e}")
|
||||
|
||||
# Validate inputs
|
||||
if not update_q:
|
||||
logger.critical(f"CRITICAL: Queue is None/invalid for item {item_uuid}")
|
||||
return False
|
||||
|
||||
if not item:
|
||||
logger.critical(f"CRITICAL: Item is None/invalid")
|
||||
return False
|
||||
|
||||
# Attempt queue operation with multiple fallbacks
|
||||
try:
|
||||
# Primary: Use sync interface (thread-safe)
|
||||
success = update_q.put(item, block=True, timeout=5.0)
|
||||
if success is False: # Explicit False return means failure
|
||||
logger.critical(f"CRITICAL: Queue.put() returned False for item {item_uuid}")
|
||||
return False
|
||||
|
||||
logger.debug(f"Successfully queued item: {item_uuid}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Exception during queue operation for item {item_uuid}: {type(e).__name__}: {e}")
|
||||
|
||||
# Secondary: Attempt queue health check
|
||||
"""Queue an item for async queue processing"""
|
||||
if async_loop and not async_loop.is_closed():
|
||||
try:
|
||||
queue_size = update_q.qsize()
|
||||
is_empty = update_q.empty()
|
||||
logger.critical(f"CRITICAL: Queue health - size: {queue_size}, empty: {is_empty}")
|
||||
except Exception as health_e:
|
||||
logger.critical(f"CRITICAL: Queue health check failed: {health_e}")
|
||||
|
||||
# Log queue type for debugging
|
||||
try:
|
||||
logger.critical(f"CRITICAL: Queue type: {type(update_q)}, has sync_q: {hasattr(update_q, 'sync_q')}")
|
||||
except Exception:
|
||||
logger.critical(f"CRITICAL: Cannot determine queue type")
|
||||
|
||||
return False
|
||||
# For async queue, schedule the put operation
|
||||
asyncio.run_coroutine_threadsafe(update_q.put(item), async_loop)
|
||||
except RuntimeError as e:
|
||||
logger.error(f"Failed to queue item: {e}")
|
||||
else:
|
||||
logger.error("Async loop not available or closed for queueing item")
|
||||
|
||||
|
||||
def shutdown_workers():
|
||||
|
||||
@@ -66,9 +66,6 @@ services:
|
||||
# A valid timezone name to run as (for scheduling watch checking) see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
|
||||
# - TZ=America/Los_Angeles
|
||||
#
|
||||
# Text processing locale, en_US.UTF-8 used by default unless defined as something else here, UTF-8 should cover 99.99% of cases.
|
||||
# - LC_ALL=en_US.UTF-8
|
||||
#
|
||||
# Maximum height of screenshots, default is 16000 px, screenshots will be clipped to this if exceeded.
|
||||
# RAM usage will be higher if you increase this.
|
||||
# - SCREENSHOT_MAX_HEIGHT=16000
|
||||
|
||||
@@ -7,7 +7,6 @@ flask-paginate
|
||||
flask_expects_json~=1.7
|
||||
flask_restful
|
||||
flask_cors # For the Chrome extension to operate
|
||||
janus # Thread-safe async/sync queue bridge
|
||||
flask_wtf~=1.2
|
||||
flask~=2.3
|
||||
flask-socketio~=5.5.1
|
||||
@@ -70,6 +69,9 @@ werkzeug==3.0.6
|
||||
# Templating, so far just in the URLs but in the future can be for the notifications also
|
||||
jinja2~=3.1
|
||||
jinja2-time
|
||||
langchain~=0.3
|
||||
langchain-openai~=0.3
|
||||
|
||||
openpyxl
|
||||
# https://peps.python.org/pep-0508/#environment-markers
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/1009
|
||||
|
||||
Reference in New Issue
Block a user