mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 08:34:57 +00:00 
			
		
		
		
	Compare commits
	
		
			2 Commits
		
	
	
		
			0.46.01
			...
			fetch-reli
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					6788796788 | ||
| 
						 | 
					efafc9bef8 | 
@@ -42,7 +42,7 @@ class BrowserStepsStepTimout(Exception):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PageUnloadable(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, screenshot=False, message=False):
 | 
			
		||||
    def __init__(self, status_code, url, message, screenshot=False):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
@@ -299,23 +299,34 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
            if len(request_headers):
 | 
			
		||||
                context.set_extra_http_headers(request_headers)
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                self.page.set_default_navigation_timeout(90000)
 | 
			
		||||
                self.page.set_default_timeout(90000)
 | 
			
		||||
 | 
			
		||||
                # Listen for all console events and handle errors
 | 
			
		||||
                self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
 | 
			
		||||
 | 
			
		||||
                # Bug - never set viewport size BEFORE page.goto
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
                # Waits for the next navigation. Using Python context manager
 | 
			
		||||
                # prevents a race condition between clicking and waiting for a navigation.
 | 
			
		||||
                response = self.page.goto(url, wait_until='commit')
 | 
			
		||||
            # Goto page
 | 
			
		||||
            try:
 | 
			
		||||
                # Wait_until = commit
 | 
			
		||||
                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
 | 
			
		||||
                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
 | 
			
		||||
                # This seemed to solve nearly all 'TimeoutErrors'
 | 
			
		||||
                response = self.page.goto(url, wait_until='commit')
 | 
			
		||||
            except playwright._impl._api_types.Error as e:
 | 
			
		||||
                # Retry once - https://github.com/browserless/chrome/issues/2485
 | 
			
		||||
                # Sometimes errors related to invalid cert's and other can be random
 | 
			
		||||
                print ("Content Fetcher > retrying request got error - ", str(e))
 | 
			
		||||
                time.sleep(1)
 | 
			
		||||
                response = self.page.goto(url, wait_until='commit')
 | 
			
		||||
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print ("Content Fetcher > Other exception when page.goto", str(e))
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                raise PageUnloadable(url=url, status_code=None, message=str(e))
 | 
			
		||||
 | 
			
		||||
            # Execute any browser steps
 | 
			
		||||
            try:
 | 
			
		||||
                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
                self.page.wait_for_timeout(extra_wait * 1000)
 | 
			
		||||
 | 
			
		||||
@@ -328,17 +339,15 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
                # This can be ok, we will try to grab what we could retrieve
 | 
			
		||||
                pass
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print ("other exception when page.goto")
 | 
			
		||||
                print (str(e))
 | 
			
		||||
                print ("Content Fetcher > Other exception when executing custom JS code", str(e))
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                raise PageUnloadable(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
                raise PageUnloadable(url=url, status_code=None, message=str(e))
 | 
			
		||||
 | 
			
		||||
            if response is None:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                print ("response object was none")
 | 
			
		||||
                print ("Content Fetcher > Response object was none")
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            # Bug 2(?) Set the viewport size AFTER loading the page
 | 
			
		||||
@@ -357,7 +366,7 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
            if len(self.page.content().strip()) == 0:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                print ("Content was empty")
 | 
			
		||||
                print ("Content Fetcher > Content was empty")
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            # Bug 2(?) Set the viewport size AFTER loading the page
 | 
			
		||||
@@ -502,7 +511,7 @@ class base_html_webdriver(Fetcher):
 | 
			
		||||
            try:
 | 
			
		||||
                self.driver.quit()
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print("Exception in chrome shutdown/quit" + str(e))
 | 
			
		||||
                print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# "html_requests" is listed as the default fetcher in store.py!
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user