mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 00:27:48 +00:00 
			
		
		
		
	Compare commits
	
		
			19 Commits
		
	
	
		
			puppeteer-
			...
			playwright
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					38fef78664 | ||
| 
						 | 
					83d9c2c614 | ||
| 
						 | 
					a4ffd8e86c | ||
| 
						 | 
					00279219c7 | ||
| 
						 | 
					a7d4af52ca | ||
| 
						 | 
					88973b7408 | ||
| 
						 | 
					d8fbf4fbda | ||
| 
						 | 
					e08bd6e279 | ||
| 
						 | 
					ecff0c4ec5 | ||
| 
						 | 
					bcb703cad4 | ||
| 
						 | 
					69817f2fd9 | ||
| 
						 | 
					85b8526d81 | ||
| 
						 | 
					bd302e1dd9 | ||
| 
						 | 
					cfdbecea63 | ||
| 
						 | 
					c8ac19e15b | ||
| 
						 | 
					f57c45f362 | ||
| 
						 | 
					1f9bbef021 | ||
| 
						 | 
					cdb0a22979 | ||
| 
						 | 
					2d9ff7821c | 
@@ -51,6 +51,7 @@ class BrowserStepsStepException(Exception):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# @todo - make base Exception class that announces via logger()
 | 
			
		||||
class PageUnloadable(Exception):
 | 
			
		||||
    def __init__(self, status_code, url, message, screenshot=False):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
@@ -389,10 +390,24 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
            raise PageUnloadable(url=url, status_code=None, message=f"Timed out connecting to browserless, retrying..")
 | 
			
		||||
        else:
 | 
			
		||||
            # 200 Here means that the communication to browserless worked only, not the page state
 | 
			
		||||
            if response.status_code == 200:
 | 
			
		||||
            try:
 | 
			
		||||
                x = response.json()
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                raise PageUnloadable(url=url, message="Error reading JSON response from browserless")
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                self.status_code = response.status_code
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                raise PageUnloadable(url=url, message="Error reading status_code code response from browserless")
 | 
			
		||||
 | 
			
		||||
            self.headers = x.get('headers')
 | 
			
		||||
 | 
			
		||||
            if self.status_code != 200 and not ignore_status_codes:
 | 
			
		||||
                raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=x.get('content',''))
 | 
			
		||||
 | 
			
		||||
            if self.status_code == 200:
 | 
			
		||||
                import base64
 | 
			
		||||
 | 
			
		||||
                x = response.json()
 | 
			
		||||
                if not x.get('screenshot'):
 | 
			
		||||
                    # https://github.com/puppeteer/puppeteer/blob/v1.0.0/docs/troubleshooting.md#tips
 | 
			
		||||
                    # https://github.com/puppeteer/puppeteer/issues/1834
 | 
			
		||||
@@ -403,16 +418,10 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
                if not x.get('content', '').strip():
 | 
			
		||||
                    raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
                if x.get('status_code', 200) != 200 and not ignore_status_codes:
 | 
			
		||||
                    raise Non200ErrorCodeReceived(url=url, status_code=x.get('status_code', 200), page_html=x['content'])
 | 
			
		||||
 | 
			
		||||
                self.content = x.get('content')
 | 
			
		||||
                self.headers = x.get('headers')
 | 
			
		||||
                self.instock_data = x.get('instock_data')
 | 
			
		||||
                self.screenshot = base64.b64decode(x.get('screenshot'))
 | 
			
		||||
                self.status_code = x.get('status_code')
 | 
			
		||||
                self.xpath_data = x.get('xpath_data')
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
                # Some other error from browserless
 | 
			
		||||
                raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8'))
 | 
			
		||||
@@ -742,6 +751,8 @@ class html_requests(Fetcher):
 | 
			
		||||
                if encoding:
 | 
			
		||||
                    r.encoding = encoding
 | 
			
		||||
 | 
			
		||||
        self.headers = r.headers
 | 
			
		||||
 | 
			
		||||
        if not r.content or not len(r.content):
 | 
			
		||||
            raise EmptyReply(url=url, status_code=r.status_code)
 | 
			
		||||
 | 
			
		||||
@@ -758,7 +769,7 @@ class html_requests(Fetcher):
 | 
			
		||||
        else:
 | 
			
		||||
            self.content = r.text
 | 
			
		||||
 | 
			
		||||
        self.headers = r.headers
 | 
			
		||||
 | 
			
		||||
        self.raw_content = r.content
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -56,6 +56,7 @@ base_config = {
 | 
			
		||||
    'previous_md5': False,
 | 
			
		||||
    'previous_md5_before_filters': False,  # Used for skipping changedetection entirely
 | 
			
		||||
    'proxy': None,  # Preferred proxy connection
 | 
			
		||||
    'remote_server_reply': None, # From 'server' reply header
 | 
			
		||||
    'subtractive_selectors': [],
 | 
			
		||||
    'tag': '', # Old system of text name for a tag, to be removed
 | 
			
		||||
    'tags': [], # list of UUIDs to App.Tags
 | 
			
		||||
 
 | 
			
		||||
@@ -255,6 +255,7 @@ class ChangeDetectionStore:
 | 
			
		||||
                'last_viewed': 0,
 | 
			
		||||
                'previous_md5': False,
 | 
			
		||||
                'previous_md5_before_filters': False,
 | 
			
		||||
                'remote_server_reply': None,
 | 
			
		||||
                'track_ldjson_price_data': None,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@ def test_setup(live_server):
 | 
			
		||||
# Hard to just add more live server URLs when one test is already running (I think)
 | 
			
		||||
# So we add our test here (was in a different file)
 | 
			
		||||
def test_headers_in_request(client, live_server):
 | 
			
		||||
    #live_server_setup(live_server)
 | 
			
		||||
    #ve_server_setup(live_server)
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_headers', _external=True)
 | 
			
		||||
    if os.getenv('PLAYWRIGHT_DRIVER_URL'):
 | 
			
		||||
@@ -70,16 +70,17 @@ def test_headers_in_request(client, live_server):
 | 
			
		||||
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    # Re #137 -  Examine the JSON index file, it should have only one set of headers entered
 | 
			
		||||
    # Re #137 -  It should have only one set of headers entered
 | 
			
		||||
    watches_with_headers = 0
 | 
			
		||||
    with open('test-datastore/url-watches.json') as f:
 | 
			
		||||
        app_struct = json.load(f)
 | 
			
		||||
        for uuid in app_struct['watching']:
 | 
			
		||||
            if (len(app_struct['watching'][uuid]['headers'])):
 | 
			
		||||
    for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
 | 
			
		||||
            if (len(watch['headers'])):
 | 
			
		||||
                watches_with_headers += 1
 | 
			
		||||
    assert watches_with_headers == 1
 | 
			
		||||
 | 
			
		||||
    # 'server' http header was automatically recorded
 | 
			
		||||
    for k, watch in client.application.config.get('DATASTORE').data.get('watching').items():
 | 
			
		||||
        assert 'custom' in watch.get('remote_server_reply') # added in util.py
 | 
			
		||||
 | 
			
		||||
    # Should be only one with headers set
 | 
			
		||||
    assert watches_with_headers==1
 | 
			
		||||
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
    assert b'Deleted' in res.data
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -175,12 +175,16 @@ def live_server_setup(live_server):
 | 
			
		||||
    @live_server.app.route('/test-headers')
 | 
			
		||||
    def test_headers():
 | 
			
		||||
 | 
			
		||||
        output= []
 | 
			
		||||
        output = []
 | 
			
		||||
 | 
			
		||||
        for header in request.headers:
 | 
			
		||||
             output.append("{}:{}".format(str(header[0]),str(header[1])   ))
 | 
			
		||||
            output.append("{}:{}".format(str(header[0]), str(header[1])))
 | 
			
		||||
 | 
			
		||||
        return "\n".join(output)
 | 
			
		||||
        content = "\n".join(output)
 | 
			
		||||
 | 
			
		||||
        resp = make_response(content, 200)
 | 
			
		||||
        resp.headers['server'] = 'custom'
 | 
			
		||||
        return resp
 | 
			
		||||
 | 
			
		||||
    # Just return the body in the request
 | 
			
		||||
    @live_server.app.route('/test-body', methods=['POST', 'GET'])
 | 
			
		||||
 
 | 
			
		||||
@@ -491,6 +491,16 @@ class update_worker(threading.Thread):
 | 
			
		||||
                    if self.datastore.data['watching'].get(uuid):
 | 
			
		||||
                        # Always record that we atleast tried
 | 
			
		||||
                        count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1
 | 
			
		||||
 | 
			
		||||
                        # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
 | 
			
		||||
                        try:
 | 
			
		||||
                            server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
 | 
			
		||||
                            self.datastore.update_watch(uuid=uuid,
 | 
			
		||||
                                                        update_obj={'remote_server_reply': server_header}
 | 
			
		||||
                                                        )
 | 
			
		||||
                        except Exception as e:
 | 
			
		||||
                            pass
 | 
			
		||||
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
 | 
			
		||||
                                                                           'last_checked': round(time.time()),
 | 
			
		||||
                                                                           'check_count': count
 | 
			
		||||
 
 | 
			
		||||
@@ -94,7 +94,8 @@ services:
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
     # Used for fetching pages via Playwright+Chrome where you need Javascript support.
 | 
			
		||||
     # Note: works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) and other issues
 | 
			
		||||
     # Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector)
 | 
			
		||||
     #       Does not report status codes (200, 404, 403) and other issues
 | 
			
		||||
     # More information about the advantages of playwright/browserless https://www.browserless.io/blog/2023/12/13/migrating-selenium-to-playwright/
 | 
			
		||||
#    browser-chrome:
 | 
			
		||||
#        hostname: browser-chrome
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user