Compare commits

..

3 Commits

Author SHA1 Message Date
dgtlmoon
78f3f2b26a Merge branch 'master' into selenium-proxy-fix 2025-05-02 14:05:45 +02:00
dgtlmoon
535ee97ef7 Selenium proxy fixes 2025-05-02 10:54:01 +02:00
dgtlmoon
b2923b8c3a Fixes to ensure proxy errors are handled correctly 2025-05-02 10:21:27 +02:00
10 changed files with 232 additions and 286 deletions

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.49.16'
__version__ = '0.49.15'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -1,128 +0,0 @@
{% macro render_watchlist_watch_as_tablerow(pagination, loop, datastore, watch) %}
{% set is_unviewed = watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %}
{% set checking_now = is_checking_now(watch) %}
<tr id="{{ watch.uuid }}"
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
{% if is_unviewed %}unviewed{% endif %}
{% if watch.has_restock_info %} has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %} {% else %}no-restock-info{% endif %}
{% if watch.uuid in queued_uuids %}queued{% endif %}
{% if checking_now %}checking-now{% endif %}
">
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
<td class="inline watch-controls">
{% if not watch.paused %}
<a class="state-off" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
{% else %}
<a class="state-on" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
{% endif %}
{% set mute_label = 'UnMute notification' if watch.notification_muted else 'Mute notification' %}
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ mute_label }}" title="{{ mute_label }}" class="icon icon-mute" ></a>
</td>
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
{% if watch.get_fetch_backend == "html_webdriver"
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
or "extra_browser_" in watch.get_fetch_backend
%}
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
{% endif %}
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
{% if watch.has_browser_steps %}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" title="Browser Steps is enabled" >{% endif %}
{% if watch.last_error is defined and watch.last_error != False %}
<div class="fetch-error">{{ watch.last_error }}
{% if '403' in watch.last_error %}
{% if has_proxies %}
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a>&nbsp;
{% endif %}
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
{% endif %}
{% if 'empty result or contain only an image' in watch.last_error %}
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Detecting-changes-in-images">more help here</a>.
{% endif %}
</div>
{% endif %}
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
<div class="fetch-error notification-error"><a href="{{url_for('settings.notification_logs')}}">{{ watch.last_notification_error }}</a></div>
{% endif %}
{% if watch['processor'] == 'text_json_diff' %}
{% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %}
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
{% endif %}
{% endif %}
{% if watch['processor'] == 'restock_diff' %}
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
{% endif %}
{% for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() %}
<span class="watch-tag-list">{{ watch_tag.title }}</span>
{% endfor %}
</td>
<!-- @todo make it so any watch handler obj can expose this --->
{% if any_has_restock_price_processor %}
<td class="restock-and-price">
{% if watch['processor'] == 'restock_diff' %}
{% if watch.has_restock_info %}
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price">
<!-- maybe some object watch['processor'][restock_diff] or.. -->
{% if watch['restock']['in_stock'] %} In stock {% else %} Not in stock {% endif %}
</span>
{% endif %}
{% if watch.get('restock') and watch['restock']['price'] != None %}
{% if watch['restock']['price'] != None %}
<span class="restock-label price" title="Price">
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
</span>
{% endif %}
{% elif not watch.has_restock_info %}
<span class="restock-label error">No information</span>
{% endif %}
{% endif %}
</td>
{% endif %}
{#last_checked becomes fetch-start-time#}
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" {% if checking_now %} data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" {% endif %} >
{% if checking_now %}
<span class="spinner"></span><span> Checking now</span>
{% else %}
{{watch|format_last_checked_time|safe}}</td>
{% endif %}
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %}
{{watch.last_changed|format_timestamp_timeago}}
{% else %}
Not yet
{% endif %}
</td>
<td>
<a {% if watch.uuid in queued_uuids %}disabled="true"{% endif %} href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}"
class="recheck pure-button pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
{% if watch.history_n >= 2 %}
{% set open_diff_in_new_tab = datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') %}
{% set target_attr = ' target="' ~ watch.uuid ~ '"' if open_diff_in_new_tab else '' %}
{% if is_unviewed %}
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
{% else %}
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
{% endif %}
{% else %}
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary">Preview</a>
{% endif %}
{% endif %}
</td>
{% endmacro %}

View File

@@ -1,7 +1,6 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title %}
{% from '_macro.html' import render_watchlist_watch_as_tablerow %}
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
<script>let nowtimeserver={{ now_time_server }};</script>
@@ -101,7 +100,130 @@
{% endif %}
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
{{ render_watchlist_watch_as_tablerow(pagination, loop, datastore, watch) }}
{% set is_unviewed = watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %}
{% set checking_now = is_checking_now(watch) %}
<tr id="{{ watch.uuid }}"
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
{% if is_unviewed %}unviewed{% endif %}
{% if watch.has_restock_info %} has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %} {% else %}no-restock-info{% endif %}
{% if watch.uuid in queued_uuids %}queued{% endif %}
{% if checking_now %}checking-now{% endif %}
">
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
<td class="inline watch-controls">
{% if not watch.paused %}
<a class="state-off" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
{% else %}
<a class="state-on" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
{% endif %}
{% set mute_label = 'UnMute notification' if watch.notification_muted else 'Mute notification' %}
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ mute_label }}" title="{{ mute_label }}" class="icon icon-mute" ></a>
</td>
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
{% if watch.get_fetch_backend == "html_webdriver"
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
or "extra_browser_" in watch.get_fetch_backend
%}
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
{% endif %}
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
{% if watch.has_browser_steps %}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" title="Browser Steps is enabled" >{% endif %}
{% if watch.last_error is defined and watch.last_error != False %}
<div class="fetch-error">{{ watch.last_error }}
{% if '403' in watch.last_error %}
{% if has_proxies %}
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a>&nbsp;
{% endif %}
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
{% endif %}
{% if 'empty result or contain only an image' in watch.last_error %}
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Detecting-changes-in-images">more help here</a>.
{% endif %}
</div>
{% endif %}
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
<div class="fetch-error notification-error"><a href="{{url_for('settings.notification_logs')}}">{{ watch.last_notification_error }}</a></div>
{% endif %}
{% if watch['processor'] == 'text_json_diff' %}
{% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %}
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
{% endif %}
{% endif %}
{% if watch['processor'] == 'restock_diff' %}
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
{% endif %}
{% for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() %}
<span class="watch-tag-list">{{ watch_tag.title }}</span>
{% endfor %}
</td>
<!-- @todo make it so any watch handler obj can expose this --->
{% if any_has_restock_price_processor %}
<td class="restock-and-price">
{% if watch['processor'] == 'restock_diff' %}
{% if watch.has_restock_info %}
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price">
<!-- maybe some object watch['processor'][restock_diff] or.. -->
{% if watch['restock']['in_stock'] %} In stock {% else %} Not in stock {% endif %}
</span>
{% endif %}
{% if watch.get('restock') and watch['restock']['price'] != None %}
{% if watch['restock']['price'] != None %}
<span class="restock-label price" title="Price">
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
</span>
{% endif %}
{% elif not watch.has_restock_info %}
<span class="restock-label error">No information</span>
{% endif %}
{% endif %}
</td>
{% endif %}
{#last_checked becomes fetch-start-time#}
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" {% if checking_now %} data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" {% endif %} >
{% if checking_now %}
<span class="spinner"></span><span> Checking now</span>
{% else %}
{{watch|format_last_checked_time|safe}}</td>
{% endif %}
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %}
{{watch.last_changed|format_timestamp_timeago}}
{% else %}
Not yet
{% endif %}
</td>
<td>
<a {% if watch.uuid in queued_uuids %}disabled="true"{% endif %} href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}"
class="recheck pure-button pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
{% if watch.history_n >= 2 %}
{% set open_diff_in_new_tab = datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') %}
{% set target_attr = ' target="' ~ watch.uuid ~ '"' if open_diff_in_new_tab else '' %}
{% if is_unviewed %}
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
{% else %}
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
{% endif %}
{% else %}
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary">Preview</a>
{% endif %}
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>

View File

@@ -10,13 +10,16 @@ class fetcher(Fetcher):
else:
fetcher_description = "WebDriver Chrome/Javascript"
# Configs for Proxy setup
# In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
proxy = None
proxy_url = None
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
from urllib.parse import urlparse
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
if not custom_browser_connection_url:
@@ -25,27 +28,25 @@ class fetcher(Fetcher):
self.browser_connection_is_custom = True
self.browser_connection_url = custom_browser_connection_url
# If any proxy settings are enabled, then we should setup the proxy object
proxy_args = {}
for k in self.selenium_proxy_settings_mappings:
v = os.getenv('webdriver_' + k, False)
if v:
proxy_args[k] = v.strip('"')
##### PROXY SETUP #####
# Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
proxy_args['httpProxy'] = self.system_http_proxy
if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
proxy_args['httpsProxy'] = self.system_https_proxy
proxy_sources = [
self.system_http_proxy,
self.system_https_proxy,
os.getenv('webdriver_proxySocks'),
os.getenv('webdriver_socksProxy'),
os.getenv('webdriver_proxyHttp'),
os.getenv('webdriver_httpProxy'),
os.getenv('webdriver_proxyHttps'),
os.getenv('webdriver_httpsProxy'),
os.getenv('webdriver_sslProxy'),
proxy_override, # last one should override
]
# The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server=
for k in filter(None, proxy_sources):
if not k:
continue
self.proxy_url = k.strip()
# Allows override the proxy on a per-request basis
if proxy_override is not None:
proxy_args['httpProxy'] = proxy_override
if proxy_args:
self.proxy = SeleniumProxy(raw=proxy_args)
def run(self,
url,
@@ -58,7 +59,9 @@ class fetcher(Fetcher):
is_binary=False,
empty_pages_are_a_change=False):
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.common.exceptions import WebDriverException
# request_body, request_method unused for now, until some magic in the future happens.
options = ChromeOptions()
@@ -73,62 +76,58 @@ class fetcher(Fetcher):
for opt in CHROME_OPTIONS:
options.add_argument(opt)
# 1. proxy_config /Proxy(proxy_config) selenium object is REALLY unreliable
# 2. selenium-wire cant be used because the websocket version conflicts with pypeteer-ng
# 3. selenium only allows ONE runner at a time by default!
# 4. driver must use quit() or it will continue to block/hold the selenium process!!
options.add_argument(f"--proxy-server={self.proxy}")
if self.proxy_url:
options.add_argument(f'--proxy-server={self.proxy_url}')
from selenium.webdriver.remote.remote_connection import RemoteConnection
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
driver = None
try:
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
remote_connection = RemoteConnection(
self.browser_connection_url,
)
remote_connection.set_timeout(30) # seconds
# Now create the driver with the RemoteConnection
driver = RemoteWebDriver(
command_executor=remote_connection,
options=options
)
driver.set_page_load_timeout(int(os.getenv("WEBDRIVER_PAGELOAD_TIMEOUT", 45)))
except Exception as e:
if driver:
driver.quit()
raise e
self.driver = webdriver.Remote(
command_executor=self.browser_connection_url,
options=options)
try:
driver.get(url)
self.driver.get(url)
except WebDriverException as e:
# Be sure we close the session window
self.quit()
raise
if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
driver.set_window_size(1280, 1024)
if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
self.driver.set_window_size(1280, 1024)
driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
if self.webdriver_js_execute_code is not None:
driver.execute_script(self.webdriver_js_execute_code)
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
if self.webdriver_js_execute_code is not None:
self.driver.execute_script(self.webdriver_js_execute_code)
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
# @todo - how to check this? is it possible?
self.status_code = 200
# @todo somehow we should try to get this working for WebDriver
# raise EmptyReply(url=url, status_code=r.status_code)
# @todo - dom wait loaded?
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
self.content = driver.page_source
self.headers = {}
self.screenshot = driver.get_screenshot_as_png()
except Exception as e:
driver.quit()
raise e
# @todo - how to check this? is it possible?
self.status_code = 200
# @todo somehow we should try to get this working for WebDriver
# raise EmptyReply(url=url, status_code=r.status_code)
driver.quit()
# @todo - dom wait loaded?
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
self.content = self.driver.page_source
self.headers = {}
self.screenshot = self.driver.get_screenshot_as_png()
# Does the connection to the webdriver work? run a test connection.
def is_ready(self):
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions
self.driver = webdriver.Remote(
command_executor=self.command_executor,
options=ChromeOptions())
# driver.quit() seems to cause better exceptions
self.quit()
return True
def quit(self, watch=None):
if self.driver:
try:
self.driver.quit()
except Exception as e:
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")

View File

@@ -224,37 +224,27 @@ class StringDictKeyValue(StringField):
def _value(self):
if self.data:
output = ''
for k, v in self.data.items():
output += f"{k}: {v}\r\n"
output = u''
for k in self.data.keys():
output += "{}: {}\r\n".format(k, self.data[k])
return output
else:
return ''
return u''
# incoming data processing + validation
# incoming
def process_formdata(self, valuelist):
self.data = {}
errors = []
if valuelist:
# Remove empty strings (blank lines)
cleaned = [line.strip() for line in valuelist[0].split("\n") if line.strip()]
for idx, s in enumerate(cleaned, start=1):
if ':' not in s:
errors.append(f"Line {idx} is missing a ':' separator.")
continue
parts = s.split(':', 1)
key = parts[0].strip()
value = parts[1].strip()
self.data = {}
# Remove empty strings
cleaned = list(filter(None, valuelist[0].split("\n")))
for s in cleaned:
parts = s.strip().split(':', 1)
if len(parts) == 2:
self.data.update({parts[0].strip(): parts[1].strip()})
if not key:
errors.append(f"Line {idx} has an empty key.")
if not value:
errors.append(f"Line {idx} has an empty value.")
self.data[key] = value
if errors:
raise ValidationError("Invalid input:\n" + "\n".join(errors))
else:
self.data = {}
class ValidateContentFetcherIsReady(object):
"""

View File

@@ -100,7 +100,8 @@ docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
# Selenium
# Selenium - todo - fix proxies
docker run --network changedet-network \
-e "WEBDRIVER_URL=http://selenium:4444/wd/hub" \
test-changedetectionio \
bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py'
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py'

View File

@@ -7,11 +7,6 @@ from ... import strtobool
# Just to be sure the UI outputs the right error message on proxy connection failed
# docker run -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
# PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/proxy_list/test_proxy_noconnect.py
# FAST_PUPPETEER_CHROME_FETCHER=True PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/proxy_list/test_proxy_noconnect.py
# WEBDRIVER_URL=http://127.0.0.1:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py
def test_proxy_noconnect_custom(client, live_server, measure_memory_usage):
live_server_setup(live_server)
@@ -21,48 +16,38 @@ def test_proxy_noconnect_custom(client, live_server, measure_memory_usage):
data={
"requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y",
"application-fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else 'html_requests',
"application-fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
"requests-extra_proxies-0-proxy_name": "custom-test-proxy",
# test:awesome is set in tests/proxy_list/squid-passwords.txt
"requests-extra_proxies-0-proxy_url": "http://127.0.0.1:3128",
"requests-extra_proxies-0-proxy_url": "http://THISPROXYDOESNTEXIST:3128",
},
follow_redirects=True
)
assert b"Settings updated." in res.data
test_url = "https://changedetection.io"
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
url_for("imports.import_page"),
# Because a URL wont show in squid/proxy logs due it being SSLed
# Use plain HTTP or a specific domain-name here
data={"urls": "https://changedetection.io/CHANGELOG.txt"},
follow_redirects=True
)
assert b"Watch added in Paused state, saving will unpause" in res.data
options = {
"url": test_url,
"fetch_backend": "html_webdriver" if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else "html_requests",
"proxy": "ui-0custom-test-proxy",
}
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1),
data=options,
follow_redirects=True
)
assert b"unpaused" in res.data
import time
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Requests default
check_string = b'Cannot connect to proxy'
res = client.get(url_for("watchlist.index"))
assert b'Page.goto: net::ERR_PROXY_CONNECTION_FAILED' in res.data
if os.getenv('PLAYWRIGHT_DRIVER_URL') or strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')) or os.getenv("WEBDRIVER_URL"):
# Requests
check_string = b'Proxy connection failed?'
if os.getenv('PLAYWRIGHT_DRIVER_URL') or strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')):
check_string = b'ERR_PROXY_CONNECTION_FAILED'
if os.getenv("WEBDRIVER_URL"):
check_string = b'ERR_PROXY_CONNECTION_FAILED'
res = client.get(url_for("watchlist.index"))
#with open("/tmp/debug.html", 'wb') as f:
# f.write(res.data)
assert check_string in res.data

View File

@@ -424,27 +424,3 @@ def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
# unlink headers.txt on start/stop
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_headers_validation(client, live_server):
#live_server_setup(live_server)
test_url = url_for('test_headers', _external=True)
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={
"url": test_url,
"fetch_backend": 'html_requests',
"headers": "User-AGent agent-from-watch\r\nsadfsadfsadfsdaf\r\n:foobar"},
follow_redirects=True
)
assert b"Line 1 is missing a &#39;:&#39; separator." in res.data
assert b"Line 3 has an empty key." in res.data

View File

@@ -136,7 +136,7 @@ def wait_for_all_checks(client):
res = client.get(url_for("watchlist.index"))
if not b'Checking now' in res.data:
break
logging.getLogger().info(f"Waiting for watch-list to not say 'Checking now'.. {attempt}")
logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
time.sleep(1)
attempt += 1

View File

@@ -53,7 +53,8 @@ lxml >=4.8.0,<6,!=5.2.0,!=5.2.1
# XPath 2.0-3.1 support - 4.2.0 broke something?
elementpath==4.1.5
selenium~=4.31.0
selenium==4.31.0
# https://github.com/pallets/werkzeug/issues/2985
# Maybe related to pytest?
@@ -70,7 +71,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
# playwright is installed at Dockerfile build time because it's not available on all platforms
pyppeteer-ng==2.0.0rc10
pyppeteer-ng==2.0.0rc9
pyppeteerstealth>=0.0.4