Compare commits

..

7 Commits

Author SHA1 Message Date
dgtlmoon
49306246fd Add link 2024-05-20 15:10:38 +02:00
dgtlmoon
8277713c56 Merge branch 'master' into easy-override-ua 2024-05-20 15:10:04 +02:00
dgtlmoon
4602d11b79 doesnt need to be set in the fetcher anymore 2024-05-18 10:33:50 +02:00
dgtlmoon
1a0a47827e And check for the field 2024-05-17 19:20:18 +02:00
dgtlmoon
274d04fdcb Add test 2024-05-17 19:10:54 +02:00
dgtlmoon
58590633dd woops 2024-05-17 18:24:45 +02:00
dgtlmoon
92e50f58dd Ability to set default UA for either fetching types 2024-05-17 18:23:52 +02:00
18 changed files with 42 additions and 134 deletions

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.45.23'
__version__ = '0.45.22'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -28,7 +28,7 @@ def manage_user_agent(headers, current_ua=''):
:return:
"""
# Ask it what the user agent is, if its obviously ChromeHeadless, switch it to the default
ua_in_custom_headers = headers.get('User-Agent')
ua_in_custom_headers = next((v for k, v in headers.items() if k.lower() == "user-agent"), None)
if ua_in_custom_headers:
return ua_in_custom_headers

View File

@@ -92,38 +92,15 @@ class fetcher(Fetcher):
ignoreHTTPSErrors=True
)
except websockets.exceptions.InvalidStatusCode as e:
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access)")
except websockets.exceptions.InvalidURI:
raise BrowserConnectError(msg=f"Error connecting to the browser, check your browser connection address (should be ws:// or wss://")
except Exception as e:
raise BrowserConnectError(msg=f"Error connecting to the browser {str(e)}")
# Better is to launch chrome with the URL as arg
# non-headless - newPage() will launch an extra tab/window, .browser should already contain 1 page/tab
# headless - ask a new page
self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
try:
from pyppeteerstealth import inject_evasions_into_page
except ImportError:
logger.debug("pyppeteerstealth module not available, skipping")
pass
else:
# I tried hooking events via self.page.on(Events.Page.DOMContentLoaded, inject_evasions_requiring_obj_to_page)
# But I could never get it to fire reliably, so we just inject it straight after
await inject_evasions_into_page(self.page)
self.page = await browser.newPage()
# This user agent is similar to what was used when tweaking the evasions in inject_evasions_into_page(..)
user_agent = None
if request_headers and request_headers.get('User-Agent'):
# Request_headers should now be CaaseInsensitiveDict
# Remove it so it's not sent again with headers after
user_agent = request_headers.pop('User-Agent').strip()
await self.page.setUserAgent(user_agent)
if not user_agent:
# Attempt to strip 'HeadlessChrome' etc
await self.page.setUserAgent(manage_user_agent(headers=request_headers, current_ua=await self.page.evaluate('navigator.userAgent')))
await self.page.setUserAgent(manage_user_agent(headers=request_headers, current_ua=await self.page.evaluate('navigator.userAgent')))
await self.page.setBypassCSP(True)
if request_headers:

View File

@@ -124,10 +124,10 @@ def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
@app.template_filter('format_timestamp_timeago')
def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
if not timestamp:
if timestamp == False:
return 'Not yet'
return timeago.format(int(timestamp), time.time())
return timeago.format(timestamp, time.time())
@app.template_filter('pagination_slice')
@@ -771,7 +771,7 @@ def changedetection_app(config=None, datastore_o=None):
jq_support=jq_support,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
settings_application=datastore.data['settings']['application'],
using_global_webdriver_wait=not default['webdriver_delay'],
using_global_webdriver_wait=default['webdriver_delay'] is None,
uuid=uuid,
visualselector_enabled=visualselector_enabled,
watch=watch

View File

@@ -333,9 +333,7 @@ class model(dict):
# Small hack so that we sleep just enough to allow 1 second between history snapshots
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
logger.warning(f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
timestamp = str(int(timestamp) + 1)
time.sleep(1)
time.sleep(timestamp - self.__newest_history_key)
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))

View File

@@ -1,10 +1,10 @@
from abc import abstractmethod
from changedetectionio.strtobool import strtobool
from copy import deepcopy
from loguru import logger
import hashlib
import os
import hashlib
import re
from copy import deepcopy
from changedetectionio.strtobool import strtobool
from loguru import logger
class difference_detection_processor():
@@ -21,7 +21,7 @@ class difference_detection_processor():
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
def call_browser(self):
from requests.structures import CaseInsensitiveDict
# Protect against file:// access
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
@@ -93,16 +93,14 @@ class difference_detection_processor():
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
# Tweak the base config with the per-watch ones
request_headers = CaseInsensitiveDict()
request_headers = self.watch.get('headers', [])
request_headers.update(self.datastore.get_all_base_headers())
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
ua = self.datastore.data['settings']['requests'].get('default_ua')
if ua and ua.get(prefer_fetch_backend):
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
request_headers.update(self.watch.get('headers', {}))
request_headers.update(self.datastore.get_all_base_headers())
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
# https://github.com/psf/requests/issues/4525
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
# do this by accident.

View File

@@ -40,13 +40,9 @@ if (selectElement) {
if (selectedOption) {
if (selectedOption.previousElementSibling) {
document.getElementById('btn-previous').href = "?version=" + selectedOption.previousElementSibling.value;
} else {
document.getElementById('btn-previous').remove()
}
if (selectedOption.nextElementSibling) {
document.getElementById('btn-next').href = "?version=" + selectedOption.nextElementSibling.value;
} else {
document.getElementById('btn-next').remove()
}
}

View File

@@ -178,7 +178,7 @@ class ChangeDetectionStore:
@property
def has_unviewed(self):
for uuid, watch in self.__data['watching'].items():
if watch.history_n >= 2 and watch.viewed == False:
if watch.viewed == False:
return True
return False

View File

@@ -6,9 +6,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0" >
<meta name="description" content="Self hosted website change detection." >
<title>Change Detection{{extra_title}}</title>
{% if app_rss_token %}
<link rel="alternate" type="application/rss+xml" title="Changedetection.io » Feed{% if active_tag_uuid %}- {{active_tag.title}}{% endif %}" href="{{ url_for('rss', tag=active_tag_uuid , token=app_rss_token)}}" >
{% endif %}
<link rel="alternate" type="application/rss+xml" title="Changedetection.io » Feed{% if active_tag %}- {{active_tag}}{% endif %}" href="{{ url_for('rss', tag=active_tag , token=app_rss_token)}}" >
<link rel="stylesheet" href="{{url_for('static_content', group='styles', filename='pure-min.css')}}" >
<link rel="stylesheet" href="{{url_for('static_content', group='styles', filename='styles.css')}}?v={{ get_css_version() }}" >
{% if extra_stylesheets %}
@@ -85,8 +83,8 @@
<li class="pure-menu-item pure-form" id="search-menu-item">
<!-- We use GET here so it offers people a chance to set bookmarks etc -->
<form name="searchForm" action="" method="GET">
<input id="search-q" class="" name="q" placeholder="URL or Title {% if active_tag_uuid %}in '{{ active_tag.title }}'{% endif %}" required="" type="text" value="">
<input name="tags" type="hidden" value="{% if active_tag_uuid %}{{active_tag_uuid}}{% endif %}">
<input id="search-q" class="" name="q" placeholder="URL or Title {% if active_tag %}in '{{ active_tag }}'{% endif %}" required="" type="text" value="">
<input name="tags" type="hidden" value="{% if active_tag %}{{active_tag}}{% endif %}">
<button class="toggle-button " id="toggle-search" type="button" title="Search, or Use Alt+S Key" >
{% include "svgs/search-icon.svg" %}
</button>

View File

@@ -433,8 +433,7 @@ Unavailable") }}
<div class="pure-control-group">
{% if visualselector_enabled %}
<span class="pure-form-message-inline">
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection &dash; after the <i>Browser Steps</i> has completed.<br>
This tool is a helper to manage filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab.
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection &dash; after the <i>Browser Steps</i> has completed, this tool is a helper to manage filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab.
</span>
<div id="selector-header">

View File

@@ -66,7 +66,7 @@
</div>
<div class="tab-pane-inner" id="text">
<div class="snapshot-age">{{ current_version|format_timestamp_timeago }}</div>
<div class="snapshot-age">{{ watch.snapshot_text_ctime|format_timestamp_timeago }}</div>
<span class="ignored">Grey lines are ignored</span> <span class="triggered">Blue lines are triggers</span>
<span class="tip"><strong>Pro-tip</strong>: Highlight text to add to ignore filters</span>

View File

@@ -13,7 +13,7 @@
<div id="watch-add-wrapper-zone">
{{ render_nolabel_field(form.url, placeholder="https://...", required=true) }}
{{ render_nolabel_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="watch label / tag") }}
{{ render_nolabel_field(form.tags, value=active_tag.title if active_tag else '', placeholder="watch label / tag") }}
{{ render_nolabel_field(form.watch_submit_button, title="Watch this URL!" ) }}
{{ render_nolabel_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
</div>
@@ -46,7 +46,7 @@
{% endif %}
{% if search_q %}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{% endif %}
<div>
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a>
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
<!-- tag list -->
{% for uuid, tag in tags %}
@@ -67,18 +67,18 @@
<tr>
{% set link_order = "desc" if sort_order == 'asc' else "asc" %}
{% set arrow_span = "" %}
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag)}}"># <span class='arrow {{link_order}}'></span></a></th>
<th></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag)}}">Website <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
<th></th>
</tr>
</thead>
<tbody>
{% if not watches|length %}
<tr>
<td colspan="6" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('import_page')}}" >import a list</a>.</td>
<td colspan="6">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('import_page')}}" >import a list</a>.</td>
</tr>
{% endif %}
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
@@ -95,11 +95,11 @@
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
<td class="inline watch-controls">
{% if not watch.paused %}
<a class="state-off" href="{{url_for('index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
<a class="state-off" href="{{url_for('index', op='pause', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
{% else %}
<a class="state-on" href="{{url_for('index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
<a class="state-on" href="{{url_for('index', op='pause', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
{% endif %}
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
</td>
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
@@ -204,7 +204,7 @@
all {% if active_tag_uuid %} in "{{active_tag.title}}"{%endif%}</a>
</li>
<li>
<a href="{{ url_for('rss', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='Generic_Feed-icon.svg')}}" height="15"></a>
<a href="{{ url_for('rss', tag=active_tag , token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='Generic_Feed-icon.svg')}}" height="15"></a>
</li>
</ul>
{{ pagination.links }}

View File

@@ -71,8 +71,6 @@ def test_check_notification_email_formats_default_HTML(client, live_server):
wait_for_all_checks(client)
set_longer_modified_response()
time.sleep(2)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
@@ -137,7 +135,6 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv
wait_for_all_checks(client)
set_longer_modified_response()
time.sleep(2)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)

View File

@@ -105,7 +105,6 @@ def test_check_ldjson_price_autodetect(client, live_server):
wait_for_all_checks(client)
# Trigger a check
time.sleep(1)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Offer should be gone

View File

@@ -135,9 +135,6 @@ def test_check_basic_change_detection_functionality(client, live_server):
# It should have picked up the <title>
assert b'head title' in res.data
# Be sure the last_viewed is going to be greater than the last snapshot
time.sleep(1)
# hit the mark all viewed link
res = client.get(url_for("mark_all_viewed"), follow_redirects=True)

View File

@@ -9,6 +9,9 @@ def test_check_notification_error_handling(client, live_server):
live_server_setup(live_server)
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Set a URL and fetch it, then set a notification URL which is going to give errors
test_url = url_for('test_endpoint', _external=True)
res = client.post(

View File

@@ -253,62 +253,6 @@ def test_method_in_request(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
# Re #2408 - user-agent override test, also should handle case-insensitive header deduplication
def test_ua_global_override(client, live_server):
# live_server_setup(live_server)
test_url = url_for('test_headers', _external=True)
res = client.post(
url_for("settings_page"),
data={
"application-fetch_backend": "html_requests",
"application-minutes_between_check": 180,
"requests-default_ua-html_requests": "html-requests-user-agent"
},
follow_redirects=True
)
assert b'Settings updated' in res.data
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b"html-requests-user-agent" in res.data
# default user-agent should have shown by now
# now add a custom one in the headers
# Add some headers to a request
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": test_url,
"tags": "testtag",
"fetch_backend": 'html_requests',
# Important - also test case-insensitive
"headers": "User-AGent: agent-from-watch"},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b"agent-from-watch" in res.data
assert b"html-requests-user-agent" not in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_headers_textfile_in_request(client, live_server):
#live_server_setup(live_server)
# Add our URL to the import page
@@ -389,7 +333,7 @@ def test_headers_textfile_in_request(client, live_server):
# Not needed anymore
os.unlink('test-datastore/headers.txt')
os.unlink('test-datastore/headers-testtag.txt')
os.unlink('test-datastore/' + extract_UUID_from_client(client) + '/headers.txt')
# The service should echo back the request verb
res = client.get(
url_for("preview_page", uuid="first"),

View File

@@ -1,7 +1,7 @@
# Used by Pyppeteer
pyee
eventlet==0.35.2 # related to dnspython fixes
eventlet==0.33.3 # related to dnspython fixes
feedgen~=0.9
flask-compress
# 0.6.3 included compatibility fix for werkzeug 3.x (2.x had deprecation of url handlers)
@@ -73,10 +73,12 @@ openpyxl
jq~=1.3; python_version >= "3.8" and sys_platform == "darwin"
jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
# Any current modern version, required so far for screenshot PNG->JPEG conversion but will be used more in the future
pillow
# playwright is installed at Dockerfile build time because it's not available on all platforms
# experimental release
pyppeteer-ng==2.0.0rc5
pyppeteerstealth>=0.0.4
# Include pytest, so if theres a support issue we can ask them to run these tests on their setup
pytest ~=7.2