Compare commits

...

8 Commits

Author SHA1 Message Date
dgtlmoon
222e89a7a1 tweak 2025-10-09 18:03:02 +02:00
dgtlmoon
a64f47c2fe small fix 2025-10-09 18:01:54 +02:00
dgtlmoon
e9905c2f64 memory improvements 2025-10-09 17:55:09 +02:00
dgtlmoon
6ad4acc9fc 0.50.18
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled
2025-10-09 12:57:10 +02:00
dgtlmoon
3e59521f48 Always follow plaintext header over the actual content type if its available (#3473) #3472 2025-10-09 12:56:32 +02:00
dependabot[bot]
0970c087c8 Bump github/codeql-action from 3 to 4 in the all group (#3468) 2025-10-09 10:47:12 +02:00
dependabot[bot]
676c550e6e (Realtime updates) Update python-engineio requirement from ~=4.12.0 to ~=4.12.3 (#3467)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
CodeQL / Analyze (javascript) (push) Has been cancelled
CodeQL / Analyze (python) (push) Has been cancelled
2025-10-09 01:15:36 +02:00
dependabot[bot]
78fa47f6f8 Bump psutil from 7.0.0 to 7.1.0 (#3469) 2025-10-09 00:50:53 +02:00
7 changed files with 63 additions and 39 deletions

View File

@@ -34,7 +34,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
uses: github/codeql-action/init@v4
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -45,7 +45,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v3
uses: github/codeql-action/autobuild@v4
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@@ -59,4 +59,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
uses: github/codeql-action/analyze@v4

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.50.17'
__version__ = '0.50.18'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -1,10 +1,7 @@
import copy
import yaml
import functools
from flask import request, abort
from loguru import logger
from openapi_core import OpenAPI
from openapi_core.contrib.flask import FlaskOpenAPIRequest
from . import api_schema
from ..model import watch_base
@@ -34,7 +31,11 @@ schema_delete_notification_urls['required'] = ['notification_urls']
@functools.cache
def get_openapi_spec():
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
import os
import yaml # Lazy import - only loaded when API validation is actually used
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
with open(spec_path, 'r') as f:
spec_dict = yaml.safe_load(f)
@@ -49,6 +50,9 @@ def validate_openapi_request(operation_id):
try:
# Skip OpenAPI validation for GET requests since they don't have request bodies
if request.method.upper() != 'GET':
# Lazy import - only loaded when actually validating a request
from openapi_core.contrib.flask import FlaskOpenAPIRequest
spec = get_openapi_spec()
openapi_request = FlaskOpenAPIRequest(request)
result = spec.unmarshal_request(openapi_request)

View File

@@ -20,8 +20,6 @@ Used by: processors/text_json_diff/processor.py and other content processors
RSS_XML_CONTENT_TYPES = [
"application/rss+xml",
"application/rdf+xml",
"text/xml",
"application/xml",
"application/atom+xml",
"text/rss+xml", # rare, non-standard
"application/x-rss+xml", # legacy (older feed software)
@@ -37,11 +35,6 @@ JSON_CONTENT_TYPES = [
"application/vnd.api+json",
]
# CSV Content-types
CSV_CONTENT_TYPES = [
"text/csv",
"application/csv",
]
# Generic XML Content-types (non-RSS/Atom)
XML_CONTENT_TYPES = [
@@ -49,21 +42,10 @@ XML_CONTENT_TYPES = [
"application/xml",
]
# YAML Content-types
YAML_CONTENT_TYPES = [
"text/yaml",
"text/x-yaml",
"application/yaml",
"application/x-yaml",
]
HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div']
import re
import magic
from loguru import logger
class guess_stream_type():
is_pdf = False
is_json = False
@@ -75,7 +57,7 @@ class guess_stream_type():
is_yaml = False
def __init__(self, http_content_header, content):
import re
magic_content_header = http_content_header
test_content = content[:200].lower().strip()
@@ -85,6 +67,8 @@ class guess_stream_type():
# Magic will sometimes call text/plain as text/html!
magic_result = None
try:
import magic
mime = magic.from_buffer(content[:200], mime=True) # Send the original content
logger.debug(f"Guessing mime type, original content_type '{http_content_header}', mime type detected '{mime}'")
if mime and "/" in mime:
@@ -104,18 +88,16 @@ class guess_stream_type():
has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS)
# Always trust headers first
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES) or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES):
if 'text/plain' in http_content_header:
self.is_plaintext = True
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
self.is_rss = True
elif any(s in http_content_header for s in JSON_CONTENT_TYPES) or any(s in magic_content_header for s in JSON_CONTENT_TYPES):
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
self.is_json = True
elif any(s in http_content_header for s in CSV_CONTENT_TYPES) or any(s in magic_content_header for s in CSV_CONTENT_TYPES):
self.is_csv = True
elif any(s in http_content_header for s in XML_CONTENT_TYPES) or any(s in magic_content_header for s in XML_CONTENT_TYPES):
elif any(s in http_content_header for s in XML_CONTENT_TYPES):
# Only mark as generic XML if not already detected as RSS
if not self.is_rss:
self.is_xml = True
elif any(s in http_content_header for s in YAML_CONTENT_TYPES) or any(s in magic_content_header for s in YAML_CONTENT_TYPES):
self.is_yaml = True
elif 'pdf' in magic_content_header:
self.is_pdf = True
###
@@ -125,13 +107,18 @@ class guess_stream_type():
elif magic_result == 'text/plain':
self.is_plaintext = True
logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)")
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized:
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
self.is_json = True
# magic will call a rss document 'xml'
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES):
self.is_rss = True
elif test_content_normalized.startswith('<?xml'):
elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES):
# Generic XML that's not RSS/Atom (RSS/Atom checked above)
self.is_xml = True
elif '%pdf-1' in test_content:
self.is_pdf = True
elif http_content_header.startswith('text/'):
self.is_plaintext = True
# Only trust magic for 'text' if no other patterns matched
elif 'text' in magic_content_header:
self.is_plaintext = True

View File

@@ -295,3 +295,36 @@ got it\r\n
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
# Server says its plaintext, we should always treat it as plaintext
def test_plaintext_even_if_xml_content(client, live_server, measure_memory_usage):
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<?xml version="1.0" encoding="utf-8"?>
<resources xmlns:tools="http://schemas.android.com/tools">
<!--Activity and fragment titles-->
<string name="feed_update_receiver_name">Abonnementen bijwerken</string>
</resources>
""")
test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
# Add our URL to the import page
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
res = client.get(
url_for("ui.ui_views.preview_page", uuid="first"),
follow_redirects=True
)
assert b'&lt;string name=&#34;feed_update_receiver_name&#34;' in res.data
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)

View File

@@ -111,7 +111,7 @@ def test_basic_cdata_rss_markup(client, live_server, measure_memory_usage):
set_original_cdata_xml()
test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
test_url = url_for('test_endpoint', content_type="application/atom+xml; charset=UTF-8", _external=True)
# Add our URL to the import page
res = client.post(
@@ -139,7 +139,7 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
set_original_cdata_xml()
test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
test_url = url_for('test_endpoint', content_type="application/atom+xml; charset=UTF-8", _external=True)
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),

View File

@@ -12,7 +12,7 @@ flask_wtf~=1.2
flask~=2.3
flask-socketio~=5.5.1
python-socketio~=5.13.0
python-engineio~=4.12.0
python-engineio~=4.12.3
inscriptis~=2.2
pytz
timeago~=1.0
@@ -135,7 +135,7 @@ tzdata
pluggy ~= 1.5
# Needed for testing, cross-platform for process and system monitoring
psutil==7.0.0
psutil==7.1.0
ruff >= 0.11.2
pre_commit >= 4.2.0