Compare commits

...

3 Commits

Author SHA1 Message Date
dgtlmoon
8578cc3582 Fix tuple 2022-10-09 18:10:24 +02:00
dgtlmoon
b72d6f8dec use brotli package 2022-10-09 18:10:14 +02:00
dgtlmoon
5b3f240846 Dont use default Requests user-agent and accept headers in playwright+selenium requests, breaks sites such as united.com. 2022-10-09 17:54:13 +02:00
4 changed files with 17 additions and 5 deletions

View File

@@ -575,6 +575,11 @@ class html_requests(Fetcher):
ignore_status_codes=False,
current_css_filter=None):
# Make requests use a more modern looking user-agent
if not 'User-Agent' in request_headers:
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
proxies = {}
# Allows override the proxy on a per-request basis

View File

@@ -13,10 +13,6 @@ class model(dict):
'watching': {},
'settings': {
'headers': {
'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
},
'requests': {
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds

View File

@@ -575,3 +575,11 @@ class ChangeDetectionStore:
continue
return
# We incorrectly used common header overrides that should only apply to Requests
# These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium
def update_7(self):
# These were hard-coded in early versions
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
if self.data['settings']['headers'].get(v):
del self.data['settings']['headers'][v]

View File

@@ -10,7 +10,10 @@ flask_restful
pytz
# Set these versions together to avoid a RequestsDependencyWarning
requests[socks] ~= 2.26
# >= 2.26 also adds Brotli support if brotli is installed
brotli ~= 1.0
requests[socks] ~= 2.28
urllib3 > 1.26
chardet > 2.3.0