mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-04-30 14:50:39 +00:00
e9e8c8d218
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / lint-translations (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-14 (push) Has been cancelled
85 lines
2.7 KiB
Python
85 lines
2.7 KiB
Python
"""
|
|
Parse and validate LLM JSON responses.
|
|
Pure functions — no side effects, fully testable.
|
|
|
|
LLMs occasionally return JSON wrapped in markdown fences or with trailing
|
|
text. This module handles those cases gracefully.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
|
|
# Positional selectors are fragile — reject them even if the LLM generates them
|
|
_POSITIONAL_SELECTOR_RE = re.compile(
|
|
r'nth-child|nth-of-type|:eq\(|\[\d+\]|\/\/\*\[\d',
|
|
re.IGNORECASE
|
|
)
|
|
|
|
|
|
def _extract_json(raw: str) -> str:
|
|
"""Strip markdown fences and extract the first JSON object."""
|
|
raw = raw.strip()
|
|
# Remove ```json ... ``` or ``` ... ``` fences
|
|
raw = re.sub(r'^```(?:json)?\s*', '', raw, flags=re.MULTILINE)
|
|
raw = re.sub(r'\s*```$', '', raw, flags=re.MULTILINE)
|
|
# Find the first { ... } block
|
|
match = re.search(r'\{.*\}', raw, re.DOTALL)
|
|
return match.group(0) if match else raw
|
|
|
|
|
|
def parse_eval_response(raw: str) -> dict:
|
|
"""
|
|
Parse a diff evaluation response.
|
|
Returns {'important': bool, 'summary': str}.
|
|
Falls back to important=False on any parse error.
|
|
"""
|
|
try:
|
|
data = json.loads(_extract_json(raw))
|
|
return {
|
|
'important': bool(data.get('important', False)),
|
|
'summary': str(data.get('summary', '')).strip(),
|
|
}
|
|
except (json.JSONDecodeError, AttributeError):
|
|
return {'important': False, 'summary': ''}
|
|
|
|
|
|
def parse_preview_response(raw: str) -> dict:
|
|
"""
|
|
Parse a live-preview extraction response.
|
|
Returns {'found': bool, 'answer': str}.
|
|
Falls back to found=False on any parse error.
|
|
"""
|
|
try:
|
|
data = json.loads(_extract_json(raw))
|
|
return {
|
|
'found': bool(data.get('found', False)),
|
|
'answer': str(data.get('answer', '')).strip(),
|
|
}
|
|
except (json.JSONDecodeError, AttributeError):
|
|
return {'found': False, 'answer': ''}
|
|
|
|
|
|
def parse_setup_response(raw: str) -> dict:
|
|
"""
|
|
Parse a setup/pre-filter decision response.
|
|
Returns {'needs_prefilter': bool, 'selector': str|None, 'reason': str}.
|
|
Rejects positional selectors even if the LLM generates them.
|
|
"""
|
|
try:
|
|
data = json.loads(_extract_json(raw))
|
|
needs = bool(data.get('needs_prefilter', False))
|
|
selector = data.get('selector') or None
|
|
|
|
# Sanitise: reject positional selectors
|
|
if selector and _POSITIONAL_SELECTOR_RE.search(selector):
|
|
selector = None
|
|
needs = False
|
|
|
|
return {
|
|
'needs_prefilter': needs,
|
|
'selector': selector if needs else None,
|
|
'reason': str(data.get('reason', '')).strip(),
|
|
}
|
|
except (json.JSONDecodeError, AttributeError):
|
|
return {'needs_prefilter': False, 'selector': None, 'reason': ''}
|