mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-02 07:37:25 +00:00
Merge branch 'master' into history-preview-ignore-text-highlighting
Some checks failed
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
Some checks failed
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built 📦 package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
This commit is contained in:
6
.github/workflows/codeql-analysis.yml
vendored
6
.github/workflows/codeql-analysis.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
uses: github/codeql-action/init@v4
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
uses: github/codeql-action/autobuild@v4
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
@@ -59,4 +59,4 @@ jobs:
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
||||
uses: github/codeql-action/analyze@v4
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.50.17'
|
||||
__version__ = '0.50.18'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -20,8 +20,6 @@ Used by: processors/text_json_diff/processor.py and other content processors
|
||||
RSS_XML_CONTENT_TYPES = [
|
||||
"application/rss+xml",
|
||||
"application/rdf+xml",
|
||||
"text/xml",
|
||||
"application/xml",
|
||||
"application/atom+xml",
|
||||
"text/rss+xml", # rare, non-standard
|
||||
"application/x-rss+xml", # legacy (older feed software)
|
||||
@@ -37,11 +35,6 @@ JSON_CONTENT_TYPES = [
|
||||
"application/vnd.api+json",
|
||||
]
|
||||
|
||||
# CSV Content-types
|
||||
CSV_CONTENT_TYPES = [
|
||||
"text/csv",
|
||||
"application/csv",
|
||||
]
|
||||
|
||||
# Generic XML Content-types (non-RSS/Atom)
|
||||
XML_CONTENT_TYPES = [
|
||||
@@ -49,14 +42,6 @@ XML_CONTENT_TYPES = [
|
||||
"application/xml",
|
||||
]
|
||||
|
||||
# YAML Content-types
|
||||
YAML_CONTENT_TYPES = [
|
||||
"text/yaml",
|
||||
"text/x-yaml",
|
||||
"application/yaml",
|
||||
"application/x-yaml",
|
||||
]
|
||||
|
||||
HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div']
|
||||
|
||||
import re
|
||||
@@ -104,18 +89,16 @@ class guess_stream_type():
|
||||
has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS)
|
||||
|
||||
# Always trust headers first
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES) or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
if 'text/plain' in http_content_header:
|
||||
self.is_plaintext = True
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES) or any(s in magic_content_header for s in JSON_CONTENT_TYPES):
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
elif any(s in http_content_header for s in CSV_CONTENT_TYPES) or any(s in magic_content_header for s in CSV_CONTENT_TYPES):
|
||||
self.is_csv = True
|
||||
elif any(s in http_content_header for s in XML_CONTENT_TYPES) or any(s in magic_content_header for s in XML_CONTENT_TYPES):
|
||||
elif any(s in http_content_header for s in XML_CONTENT_TYPES):
|
||||
# Only mark as generic XML if not already detected as RSS
|
||||
if not self.is_rss:
|
||||
self.is_xml = True
|
||||
elif any(s in http_content_header for s in YAML_CONTENT_TYPES) or any(s in magic_content_header for s in YAML_CONTENT_TYPES):
|
||||
self.is_yaml = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
###
|
||||
@@ -125,13 +108,18 @@ class guess_stream_type():
|
||||
elif magic_result == 'text/plain':
|
||||
self.is_plaintext = True
|
||||
logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)")
|
||||
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized:
|
||||
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
# magic will call a rss document 'xml'
|
||||
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif test_content_normalized.startswith('<?xml'):
|
||||
elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES):
|
||||
# Generic XML that's not RSS/Atom (RSS/Atom checked above)
|
||||
self.is_xml = True
|
||||
elif '%pdf-1' in test_content:
|
||||
self.is_pdf = True
|
||||
elif http_content_header.startswith('text/'):
|
||||
self.is_plaintext = True
|
||||
# Only trust magic for 'text' if no other patterns matched
|
||||
elif 'text' in magic_content_header:
|
||||
self.is_plaintext = True
|
||||
|
||||
@@ -295,3 +295,36 @@ got it\r\n
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
# Server says its plaintext, we should always treat it as plaintext
|
||||
def test_plaintext_even_if_xml_content(client, live_server, measure_memory_usage):
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("""<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources xmlns:tools="http://schemas.android.com/tools">
|
||||
<!--Activity and fragment titles-->
|
||||
<string name="feed_update_receiver_name">Abonnementen bijwerken</string>
|
||||
</resources>
|
||||
""")
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("ui.ui_views.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'<string name="feed_update_receiver_name"' in res.data
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
|
||||
@@ -111,7 +111,7 @@ def test_basic_cdata_rss_markup(client, live_server, measure_memory_usage):
|
||||
|
||||
set_original_cdata_xml()
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
|
||||
test_url = url_for('test_endpoint', content_type="application/atom+xml; charset=UTF-8", _external=True)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
@@ -139,7 +139,7 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
|
||||
|
||||
set_original_cdata_xml()
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
|
||||
test_url = url_for('test_endpoint', content_type="application/atom+xml; charset=UTF-8", _external=True)
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
|
||||
@@ -12,7 +12,7 @@ flask_wtf~=1.2
|
||||
flask~=2.3
|
||||
flask-socketio~=5.5.1
|
||||
python-socketio~=5.13.0
|
||||
python-engineio~=4.12.0
|
||||
python-engineio~=4.12.3
|
||||
inscriptis~=2.2
|
||||
pytz
|
||||
timeago~=1.0
|
||||
@@ -137,7 +137,7 @@ tzdata
|
||||
pluggy ~= 1.5
|
||||
|
||||
# Needed for testing, cross-platform for process and system monitoring
|
||||
psutil==7.0.0
|
||||
psutil==7.1.0
|
||||
|
||||
ruff >= 0.11.2
|
||||
pre_commit >= 4.2.0
|
||||
|
||||
Reference in New Issue
Block a user