mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-10-31 22:57:18 +00:00
Compare commits
2 Commits
1314-pip-e
...
fetch-reli
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6788796788 | ||
|
|
efafc9bef8 |
@@ -42,7 +42,7 @@ class BrowserStepsStepTimout(Exception):
|
|||||||
|
|
||||||
|
|
||||||
class PageUnloadable(Exception):
|
class PageUnloadable(Exception):
|
||||||
def __init__(self, status_code, url, screenshot=False, message=False):
|
def __init__(self, status_code, url, message, screenshot=False):
|
||||||
# Set this so we can use it in other parts of the app
|
# Set this so we can use it in other parts of the app
|
||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
self.url = url
|
self.url = url
|
||||||
@@ -299,23 +299,34 @@ class base_html_playwright(Fetcher):
|
|||||||
if len(request_headers):
|
if len(request_headers):
|
||||||
context.set_extra_http_headers(request_headers)
|
context.set_extra_http_headers(request_headers)
|
||||||
|
|
||||||
try:
|
|
||||||
self.page.set_default_navigation_timeout(90000)
|
self.page.set_default_navigation_timeout(90000)
|
||||||
self.page.set_default_timeout(90000)
|
self.page.set_default_timeout(90000)
|
||||||
|
|
||||||
# Listen for all console events and handle errors
|
# Listen for all console events and handle errors
|
||||||
self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
||||||
|
|
||||||
# Bug - never set viewport size BEFORE page.goto
|
# Goto page
|
||||||
|
try:
|
||||||
|
|
||||||
# Waits for the next navigation. Using Python context manager
|
|
||||||
# prevents a race condition between clicking and waiting for a navigation.
|
|
||||||
response = self.page.goto(url, wait_until='commit')
|
|
||||||
# Wait_until = commit
|
# Wait_until = commit
|
||||||
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
|
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
|
||||||
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
|
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
|
||||||
# This seemed to solve nearly all 'TimeoutErrors'
|
# This seemed to solve nearly all 'TimeoutErrors'
|
||||||
|
response = self.page.goto(url, wait_until='commit')
|
||||||
|
except playwright._impl._api_types.Error as e:
|
||||||
|
# Retry once - https://github.com/browserless/chrome/issues/2485
|
||||||
|
# Sometimes errors related to invalid cert's and other can be random
|
||||||
|
print ("Content Fetcher > retrying request got error - ", str(e))
|
||||||
|
time.sleep(1)
|
||||||
|
response = self.page.goto(url, wait_until='commit')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print ("Content Fetcher > Other exception when page.goto", str(e))
|
||||||
|
context.close()
|
||||||
|
browser.close()
|
||||||
|
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||||
|
|
||||||
|
# Execute any browser steps
|
||||||
|
try:
|
||||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||||
self.page.wait_for_timeout(extra_wait * 1000)
|
self.page.wait_for_timeout(extra_wait * 1000)
|
||||||
|
|
||||||
@@ -328,17 +339,15 @@ class base_html_playwright(Fetcher):
|
|||||||
# This can be ok, we will try to grab what we could retrieve
|
# This can be ok, we will try to grab what we could retrieve
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print ("other exception when page.goto")
|
print ("Content Fetcher > Other exception when executing custom JS code", str(e))
|
||||||
print (str(e))
|
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
raise PageUnloadable(url=url, status_code=None)
|
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||||
|
|
||||||
|
|
||||||
if response is None:
|
if response is None:
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
print ("response object was none")
|
print ("Content Fetcher > Response object was none")
|
||||||
raise EmptyReply(url=url, status_code=None)
|
raise EmptyReply(url=url, status_code=None)
|
||||||
|
|
||||||
# Bug 2(?) Set the viewport size AFTER loading the page
|
# Bug 2(?) Set the viewport size AFTER loading the page
|
||||||
@@ -357,7 +366,7 @@ class base_html_playwright(Fetcher):
|
|||||||
if len(self.page.content().strip()) == 0:
|
if len(self.page.content().strip()) == 0:
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
print ("Content was empty")
|
print ("Content Fetcher > Content was empty")
|
||||||
raise EmptyReply(url=url, status_code=None)
|
raise EmptyReply(url=url, status_code=None)
|
||||||
|
|
||||||
# Bug 2(?) Set the viewport size AFTER loading the page
|
# Bug 2(?) Set the viewport size AFTER loading the page
|
||||||
@@ -502,7 +511,7 @@ class base_html_webdriver(Fetcher):
|
|||||||
try:
|
try:
|
||||||
self.driver.quit()
|
self.driver.quit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Exception in chrome shutdown/quit" + str(e))
|
print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
|
||||||
|
|
||||||
|
|
||||||
# "html_requests" is listed as the default fetcher in store.py!
|
# "html_requests" is listed as the default fetcher in store.py!
|
||||||
|
|||||||
Reference in New Issue
Block a user