mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 06:37:41 +00:00 
			
		
		
		
	Compare commits
	
		
			19 Commits
		
	
	
		
			conditions
			...
			playwright
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 38fef78664 | ||
|   | 83d9c2c614 | ||
|   | a4ffd8e86c | ||
|   | 00279219c7 | ||
|   | a7d4af52ca | ||
|   | 88973b7408 | ||
|   | d8fbf4fbda | ||
|   | e08bd6e279 | ||
|   | ecff0c4ec5 | ||
|   | bcb703cad4 | ||
|   | 69817f2fd9 | ||
|   | 85b8526d81 | ||
|   | bd302e1dd9 | ||
|   | cfdbecea63 | ||
|   | c8ac19e15b | ||
|   | f57c45f362 | ||
|   | 1f9bbef021 | ||
|   | cdb0a22979 | ||
|   | 2d9ff7821c | 
| @@ -51,6 +51,7 @@ class BrowserStepsStepException(Exception): | ||||
|         return | ||||
|  | ||||
|  | ||||
| # @todo - make base Exception class that announces via logger() | ||||
| class PageUnloadable(Exception): | ||||
|     def __init__(self, status_code, url, message, screenshot=False): | ||||
|         # Set this so we can use it in other parts of the app | ||||
| @@ -389,10 +390,24 @@ class base_html_playwright(Fetcher): | ||||
|             raise PageUnloadable(url=url, status_code=None, message=f"Timed out connecting to browserless, retrying..") | ||||
|         else: | ||||
|             # 200 Here means that the communication to browserless worked only, not the page state | ||||
|             if response.status_code == 200: | ||||
|             try: | ||||
|                 x = response.json() | ||||
|             except Exception as e: | ||||
|                 raise PageUnloadable(url=url, message="Error reading JSON response from browserless") | ||||
|  | ||||
|             try: | ||||
|                 self.status_code = response.status_code | ||||
|             except Exception as e: | ||||
|                 raise PageUnloadable(url=url, message="Error reading status_code code response from browserless") | ||||
|  | ||||
|             self.headers = x.get('headers') | ||||
|  | ||||
|             if self.status_code != 200 and not ignore_status_codes: | ||||
|                 raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=x.get('content','')) | ||||
|  | ||||
|             if self.status_code == 200: | ||||
|                 import base64 | ||||
|  | ||||
|                 x = response.json() | ||||
|                 if not x.get('screenshot'): | ||||
|                     # https://github.com/puppeteer/puppeteer/blob/v1.0.0/docs/troubleshooting.md#tips | ||||
|                     # https://github.com/puppeteer/puppeteer/issues/1834 | ||||
| @@ -403,16 +418,10 @@ class base_html_playwright(Fetcher): | ||||
|                 if not x.get('content', '').strip(): | ||||
|                     raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|                 if x.get('status_code', 200) != 200 and not ignore_status_codes: | ||||
|                     raise Non200ErrorCodeReceived(url=url, status_code=x.get('status_code', 200), page_html=x['content']) | ||||
|  | ||||
|                 self.content = x.get('content') | ||||
|                 self.headers = x.get('headers') | ||||
|                 self.instock_data = x.get('instock_data') | ||||
|                 self.screenshot = base64.b64decode(x.get('screenshot')) | ||||
|                 self.status_code = x.get('status_code') | ||||
|                 self.xpath_data = x.get('xpath_data') | ||||
|  | ||||
|             else: | ||||
|                 # Some other error from browserless | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8')) | ||||
| @@ -742,6 +751,8 @@ class html_requests(Fetcher): | ||||
|                 if encoding: | ||||
|                     r.encoding = encoding | ||||
|  | ||||
|         self.headers = r.headers | ||||
|  | ||||
|         if not r.content or not len(r.content): | ||||
|             raise EmptyReply(url=url, status_code=r.status_code) | ||||
|  | ||||
| @@ -758,7 +769,7 @@ class html_requests(Fetcher): | ||||
|         else: | ||||
|             self.content = r.text | ||||
|  | ||||
|         self.headers = r.headers | ||||
|  | ||||
|         self.raw_content = r.content | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -56,6 +56,7 @@ base_config = { | ||||
|     'previous_md5': False, | ||||
|     'previous_md5_before_filters': False,  # Used for skipping changedetection entirely | ||||
|     'proxy': None,  # Preferred proxy connection | ||||
|     'remote_server_reply': None, # From 'server' reply header | ||||
|     'subtractive_selectors': [], | ||||
|     'tag': '', # Old system of text name for a tag, to be removed | ||||
|     'tags': [], # list of UUIDs to App.Tags | ||||
|   | ||||
| @@ -255,6 +255,7 @@ class ChangeDetectionStore: | ||||
|                 'last_viewed': 0, | ||||
|                 'previous_md5': False, | ||||
|                 'previous_md5_before_filters': False, | ||||
|                 'remote_server_reply': None, | ||||
|                 'track_ldjson_price_data': None, | ||||
|             }) | ||||
|  | ||||
|   | ||||
| @@ -10,7 +10,7 @@ def test_setup(live_server): | ||||
| # Hard to just add more live server URLs when one test is already running (I think) | ||||
| # So we add our test here (was in a different file) | ||||
| def test_headers_in_request(client, live_server): | ||||
|     #live_server_setup(live_server) | ||||
|     #ve_server_setup(live_server) | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_headers', _external=True) | ||||
|     if os.getenv('PLAYWRIGHT_DRIVER_URL'): | ||||
| @@ -70,16 +70,17 @@ def test_headers_in_request(client, live_server): | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Re #137 -  Examine the JSON index file, it should have only one set of headers entered | ||||
|     # Re #137 -  It should have only one set of headers entered | ||||
|     watches_with_headers = 0 | ||||
|     with open('test-datastore/url-watches.json') as f: | ||||
|         app_struct = json.load(f) | ||||
|         for uuid in app_struct['watching']: | ||||
|             if (len(app_struct['watching'][uuid]['headers'])): | ||||
|     for k, watch in client.application.config.get('DATASTORE').data.get('watching').items(): | ||||
|             if (len(watch['headers'])): | ||||
|                 watches_with_headers += 1 | ||||
|     assert watches_with_headers == 1 | ||||
|  | ||||
|     # 'server' http header was automatically recorded | ||||
|     for k, watch in client.application.config.get('DATASTORE').data.get('watching').items(): | ||||
|         assert 'custom' in watch.get('remote_server_reply') # added in util.py | ||||
|  | ||||
|     # Should be only one with headers set | ||||
|     assert watches_with_headers==1 | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
|   | ||||
| @@ -175,12 +175,16 @@ def live_server_setup(live_server): | ||||
|     @live_server.app.route('/test-headers') | ||||
|     def test_headers(): | ||||
|  | ||||
|         output= [] | ||||
|         output = [] | ||||
|  | ||||
|         for header in request.headers: | ||||
|              output.append("{}:{}".format(str(header[0]),str(header[1])   )) | ||||
|             output.append("{}:{}".format(str(header[0]), str(header[1]))) | ||||
|  | ||||
|         return "\n".join(output) | ||||
|         content = "\n".join(output) | ||||
|  | ||||
|         resp = make_response(content, 200) | ||||
|         resp.headers['server'] = 'custom' | ||||
|         return resp | ||||
|  | ||||
|     # Just return the body in the request | ||||
|     @live_server.app.route('/test-body', methods=['POST', 'GET']) | ||||
|   | ||||
| @@ -491,6 +491,16 @@ class update_worker(threading.Thread): | ||||
|                     if self.datastore.data['watching'].get(uuid): | ||||
|                         # Always record that we atleast tried | ||||
|                         count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1 | ||||
|  | ||||
|                         # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds | ||||
|                         try: | ||||
|                             server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255] | ||||
|                             self.datastore.update_watch(uuid=uuid, | ||||
|                                                         update_obj={'remote_server_reply': server_header} | ||||
|                                                         ) | ||||
|                         except Exception as e: | ||||
|                             pass | ||||
|  | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), | ||||
|                                                                            'last_checked': round(time.time()), | ||||
|                                                                            'check_count': count | ||||
|   | ||||
| @@ -94,7 +94,8 @@ services: | ||||
| # | ||||
|  | ||||
|      # Used for fetching pages via Playwright+Chrome where you need Javascript support. | ||||
|      # Note: works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) and other issues | ||||
|      # Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) | ||||
|      #       Does not report status codes (200, 404, 403) and other issues | ||||
|      # More information about the advantages of playwright/browserless https://www.browserless.io/blog/2023/12/13/migrating-selenium-to-playwright/ | ||||
| #    browser-chrome: | ||||
| #        hostname: browser-chrome | ||||
|   | ||||
		Reference in New Issue
	
	Block a user