Compare commits

...

6 Commits

Author SHA1 Message Date
dgtlmoon
bbe72a11bd Adding setting 2024-06-13 11:02:41 +02:00
dgtlmoon
d78d535dcd fixing variable fetch 2024-06-13 10:54:25 +02:00
dgtlmoon
c6ee6687b5 Fetching/Requests - Fixing user agent header overrides per-watch of global settings (#2409) 2024-06-13 10:50:46 +02:00
dgtlmoon
de48892243 Code - improving unique key fix for history database handler (#2402)
* improving unique key fix

* also bump timestamp along 1 sec
2024-06-06 22:59:04 +02:00
dgtlmoon
6aded50aca UI - 'Mark all viewed' button should not show when all viewed (#2399) 2024-06-05 11:46:04 +02:00
dgtlmoon
b8e279a025 Fixing build test - Adding small delay (#2397) 2024-06-04 13:56:35 +02:00
14 changed files with 95 additions and 25 deletions

View File

@@ -28,7 +28,7 @@ def manage_user_agent(headers, current_ua=''):
:return:
"""
# Ask it what the user agent is, if its obviously ChromeHeadless, switch it to the default
ua_in_custom_headers = next((v for k, v in headers.items() if k.lower() == "user-agent"), None)
ua_in_custom_headers = headers.get('User-Agent')
if ua_in_custom_headers:
return ua_in_custom_headers

View File

@@ -115,12 +115,11 @@ class fetcher(Fetcher):
# This user agent is similar to what was used when tweaking the evasions in inject_evasions_into_page(..)
user_agent = None
if request_headers:
user_agent = next((value for key, value in request_headers.items() if key.lower().strip() == 'user-agent'), None)
if user_agent:
await self.page.setUserAgent(user_agent)
# Remove it so it's not sent again with headers after
[request_headers.pop(key) for key in list(request_headers) if key.lower().strip() == 'user-agent'.lower().strip()]
if request_headers and request_headers.get('User-Agent'):
# Request_headers should now be CaaseInsensitiveDict
# Remove it so it's not sent again with headers after
user_agent = request_headers.pop('User-Agent').strip()
await self.page.setUserAgent(user_agent)
if not user_agent:
# Attempt to strip 'HeadlessChrome' etc

View File

@@ -339,7 +339,7 @@ def changedetection_app(config=None, datastore_o=None):
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
for uuid, watch in datastore.data['watching'].items():
# @todo tag notification_muted skip also (improve Watch model)
if watch.get('notification_muted'):
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
continue
if limit_tag and not limit_tag in watch['tags']:
continue
@@ -472,7 +472,7 @@ def changedetection_app(config=None, datastore_o=None):
# Don't link to hosting when we're on the hosting environment
active_tag=active_tag,
active_tag_uuid=active_tag_uuid,
app_rss_token=datastore.data['settings']['application']['rss_access_token'],
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
datastore=datastore,
errored_count=errored_count,
form=form,

View File

@@ -572,6 +572,8 @@ class globalSettingsApplicationForm(commonSettingsForm):
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
validators=[validators.Optional()])
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0,

View File

@@ -46,6 +46,8 @@ class model(dict):
'pager_size': 50,
'password': False,
'render_anchor_tag_content': False,
'rss_access_token': None,
'rss_hide_muted_watches': True,
'schema_version' : 0,
'shared_diff_access': False,
'webdriver_delay': None , # Extra delay in seconds before extracting text

View File

@@ -333,7 +333,9 @@ class model(dict):
# Small hack so that we sleep just enough to allow 1 second between history snapshots
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
time.sleep(timestamp - self.__newest_history_key)
logger.warning(f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
timestamp = str(int(timestamp) + 1)
time.sleep(1)
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))

View File

@@ -1,10 +1,10 @@
from abc import abstractmethod
import os
import hashlib
import re
from copy import deepcopy
from changedetectionio.strtobool import strtobool
from copy import deepcopy
from loguru import logger
import hashlib
import os
import re
class difference_detection_processor():
@@ -21,7 +21,7 @@ class difference_detection_processor():
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
def call_browser(self):
from requests.structures import CaseInsensitiveDict
# Protect against file:// access
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
@@ -93,14 +93,16 @@ class difference_detection_processor():
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
# Tweak the base config with the per-watch ones
request_headers = self.watch.get('headers', [])
request_headers.update(self.datastore.get_all_base_headers())
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
request_headers = CaseInsensitiveDict()
ua = self.datastore.data['settings']['requests'].get('default_ua')
if ua and ua.get(prefer_fetch_backend):
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
request_headers.update(self.watch.get('headers', {}))
request_headers.update(self.datastore.get_all_base_headers())
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
# https://github.com/psf/requests/issues/4525
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
# do this by accident.

View File

@@ -124,12 +124,12 @@ class ChangeDetectionStore:
self.__data['app_guid'] = str(uuid_builder.uuid4())
# Generate the URL access token for RSS feeds
if not 'rss_access_token' in self.__data['settings']['application']:
if not self.__data['settings']['application'].get('rss_access_token'):
secret = secrets.token_hex(16)
self.__data['settings']['application']['rss_access_token'] = secret
# Generate the API access token
if not 'api_access_token' in self.__data['settings']['application']:
if not self.__data['settings']['application'].get('api_access_token'):
secret = secrets.token_hex(16)
self.__data['settings']['application']['api_access_token'] = secret
@@ -178,7 +178,7 @@ class ChangeDetectionStore:
@property
def has_unviewed(self):
for uuid, watch in self.__data['watching'].items():
if watch.viewed == False:
if watch.history_n >= 2 and watch.viewed == False:
return True
return False

View File

@@ -62,6 +62,9 @@
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.pager_size) }}
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>

View File

@@ -71,6 +71,8 @@ def test_check_notification_email_formats_default_HTML(client, live_server):
wait_for_all_checks(client)
set_longer_modified_response()
time.sleep(2)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
@@ -135,6 +137,7 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv
wait_for_all_checks(client)
set_longer_modified_response()
time.sleep(2)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)

View File

@@ -105,6 +105,7 @@ def test_check_ldjson_price_autodetect(client, live_server):
wait_for_all_checks(client)
# Trigger a check
time.sleep(1)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Offer should be gone

View File

@@ -135,6 +135,9 @@ def test_check_basic_change_detection_functionality(client, live_server):
# It should have picked up the <title>
assert b'head title' in res.data
# Be sure the last_viewed is going to be greater than the last snapshot
time.sleep(1)
# hit the mark all viewed link
res = client.get(url_for("mark_all_viewed"), follow_redirects=True)

View File

@@ -9,9 +9,6 @@ def test_check_notification_error_handling(client, live_server):
live_server_setup(live_server)
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Set a URL and fetch it, then set a notification URL which is going to give errors
test_url = url_for('test_endpoint', _external=True)
res = client.post(

View File

@@ -253,6 +253,62 @@ def test_method_in_request(client, live_server):
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
# Re #2408 - user-agent override test, also should handle case-insensitive header deduplication
def test_ua_global_override(client, live_server):
# live_server_setup(live_server)
test_url = url_for('test_headers', _external=True)
res = client.post(
url_for("settings_page"),
data={
"application-fetch_backend": "html_requests",
"application-minutes_between_check": 180,
"requests-default_ua-html_requests": "html-requests-user-agent"
},
follow_redirects=True
)
assert b'Settings updated' in res.data
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b"html-requests-user-agent" in res.data
# default user-agent should have shown by now
# now add a custom one in the headers
# Add some headers to a request
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": test_url,
"tags": "testtag",
"fetch_backend": 'html_requests',
# Important - also test case-insensitive
"headers": "User-AGent: agent-from-watch"},
follow_redirects=True
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b"agent-from-watch" in res.data
assert b"html-requests-user-agent" not in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_headers_textfile_in_request(client, live_server):
#live_server_setup(live_server)
# Add our URL to the import page
@@ -333,7 +389,7 @@ def test_headers_textfile_in_request(client, live_server):
# Not needed anymore
os.unlink('test-datastore/headers.txt')
os.unlink('test-datastore/headers-testtag.txt')
os.unlink('test-datastore/' + extract_UUID_from_client(client) + '/headers.txt')
# The service should echo back the request verb
res = client.get(
url_for("preview_page", uuid="first"),