mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 14:47:21 +00:00 
			
		
		
		
	Compare commits
	
		
			12 Commits
		
	
	
		
			test-speed
			...
			prefer-soc
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | cddc95193d | ||
|   | f45a1b485a | ||
|   | 1fe9501b4f | ||
|   | 0b7dacce3a | ||
|   | 44bf2c9a47 | ||
|   | 536791b0d5 | ||
|   | 04b7d98e6c | ||
|   | 5faa84474c | ||
|   | 37896002ff | ||
|   | 10690f7094 | ||
|   | eaadb5881f | ||
|   | 03976cd0e8 | 
							
								
								
									
										42
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										42
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							| @@ -28,12 +28,12 @@ jobs: | ||||
|            | ||||
|           docker network create changedet-network | ||||
|            | ||||
|           # Selenium+browserless | ||||
|           # Selenium and sockpuppetbrowser | ||||
|           docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4 | ||||
|           docker run --network changedet-network -d --name browserless --hostname browserless -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.60-chrome-stable | ||||
|           docker run --network changedet-network -d --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest | ||||
|            | ||||
|           # For accessing custom browser tests | ||||
|           docker run --network changedet-network -d --name browserless-custom-url --hostname browserless-custom-url -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm --shm-size="2g"  browserless/chrome:1.60-chrome-stable | ||||
|           docker run --network changedet-network -d --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url --rm dgtlmoon/sockpuppetbrowser:latest | ||||
|  | ||||
|       - name: Build changedetection.io container for testing | ||||
|         run: |          | ||||
| @@ -47,6 +47,12 @@ jobs: | ||||
|           # Debug SMTP server/echo message back server | ||||
|           docker run --network changedet-network -d -p 11025:11025 -p 11080:11080  --hostname mailserver test-changedetectionio  bash -c 'python changedetectionio/tests/smtp/smtp-test-server.py'  | ||||
|  | ||||
|       - name: Show docker container state and other debug info | ||||
|         run: | | ||||
|           set -x | ||||
|           echo "Running processes in docker..." | ||||
|           docker ps | ||||
|  | ||||
|       - name: Test built container with Pytest (generally as requests/plaintext fetching) | ||||
|         run: | | ||||
|           # Unit tests | ||||
| @@ -63,43 +69,33 @@ jobs: | ||||
|  | ||||
|       - name: Specific tests in built container for Selenium | ||||
|         run: | | ||||
|            | ||||
|           # Selenium fetch | ||||
|           docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' | ||||
|  | ||||
|       - name: Specific tests in built container for Playwright | ||||
|         run: |          | ||||
|           # Playwright/Browserless fetch | ||||
|           docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py' | ||||
|       - name: Specific tests in built container for Playwright and SocketPuppetBrowser | ||||
|         run: | | ||||
|           # Playwright via Sockpuppetbrowser fetch | ||||
|           docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py' | ||||
|  | ||||
|       - name: Specific tests in built container for headers and requests checks with Playwright | ||||
|         run: |                   | ||||
|           # Settings headers playwright tests - Call back in from Browserless, check headers | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|         run: |        | ||||
|           # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|  | ||||
|       - name: Specific tests in built container for headers and requests checks with Selenium | ||||
|         run: |                   | ||||
|         run: | | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|  | ||||
|       - name: Specific tests in built container with Playwright as Puppeteer experimental fetcher | ||||
|         run: |                   | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "USE_EXPERIMENTAL_PUPPETEER_FETCH=yes" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py'           | ||||
|  | ||||
|       - name: Test built container restock detection via Playwright | ||||
|         run: |                             | ||||
|           # restock detection via playwright - added name=changedet here so that playwright/browserless can connect to it | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|           # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|  | ||||
|       - name: Test SMTP notification mime types | ||||
|         run: | | ||||
|           # SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above | ||||
|           docker run --rm  --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py' | ||||
|  | ||||
|       - name: Test with puppeteer fetcher and disk cache | ||||
|         run: | | ||||
|           docker run --rm -e "PUPPETEER_DISK_CACHE=/tmp/data/" -e "USE_EXPERIMENTAL_PUPPETEER_FETCH=yes" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py' | ||||
|           # Browserless would have had -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" added above | ||||
|  | ||||
|       - name: Test proxy interaction | ||||
|         run: | | ||||
|           cd changedetectionio | ||||
|   | ||||
| @@ -4,22 +4,13 @@ | ||||
| # Why? | ||||
| # `browsersteps_playwright_browser_interface.chromium.connect_over_cdp()` will only run once without async() | ||||
| # - this flask app is not async() | ||||
| # - browserless has a single timeout/keepalive which applies to the session made at .connect_over_cdp() | ||||
| # - A single timeout/keepalive which applies to the session made at .connect_over_cdp() | ||||
| # | ||||
| # So it means that we must unfortunately for now just keep a single timer since .connect_over_cdp() was run | ||||
| # and know when that reaches timeout/keepalive :( when that time is up, restart the connection and tell the user | ||||
| # that their time is up, insert another coin. (reload) | ||||
| # | ||||
| # Bigger picture | ||||
| # - It's horrible that we have this click+wait deal, some nice socket.io solution using something similar | ||||
| # to what the browserless debug UI already gives us would be smarter.. | ||||
| # | ||||
| # OR | ||||
| # - Some API call that should be hacked into browserless or playwright that we can "/api/bump-keepalive/{session_id}/60" | ||||
| # So we can tell it that we need more time (run this on each action) | ||||
| # | ||||
| # OR | ||||
| # - use multiprocessing to bump this over to its own process and add some transport layer (queue/pipes) | ||||
|  | ||||
| from distutils.util import strtobool | ||||
| from flask import Blueprint, request, make_response | ||||
|   | ||||
| @@ -169,7 +169,7 @@ class steppable_browser_interface(): | ||||
|         self.page.locator(selector, timeout=1000).uncheck(timeout=1000) | ||||
|  | ||||
|  | ||||
| # Responsible for maintaining a live 'context' with browserless | ||||
| # Responsible for maintaining a live 'context' with the chrome CDP | ||||
| # @todo - how long do contexts live for anyway? | ||||
| class browsersteps_live_ui(steppable_browser_interface): | ||||
|     context = None | ||||
|   | ||||
| @@ -311,125 +311,6 @@ class base_html_playwright(Fetcher): | ||||
|         with open(destination, 'w') as f: | ||||
|             f.write(content) | ||||
|  | ||||
|     def run_fetch_browserless_puppeteer(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|         from pkg_resources import resource_string | ||||
|  | ||||
|         extra_wait_ms = (int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) * 1000 | ||||
|  | ||||
|         self.xpath_element_js = self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) | ||||
|         code = resource_string(__name__, "res/puppeteer_fetch.js").decode('utf-8') | ||||
|         # In the future inject this is a proper JS package | ||||
|         code = code.replace('%xpath_scrape_code%', self.xpath_element_js) | ||||
|         code = code.replace('%instock_scrape_code%', self.instock_data_js) | ||||
|  | ||||
|         from requests.exceptions import ConnectTimeout, ReadTimeout | ||||
|         wait_browserless_seconds = 240 | ||||
|  | ||||
|         browserless_function_url = os.getenv('BROWSERLESS_FUNCTION_URL') | ||||
|         from urllib.parse import urlparse | ||||
|         if not browserless_function_url: | ||||
|             # Convert/try to guess from PLAYWRIGHT_DRIVER_URL | ||||
|             o = urlparse(os.getenv('PLAYWRIGHT_DRIVER_URL')) | ||||
|             browserless_function_url = o._replace(scheme="http")._replace(path="function").geturl() | ||||
|  | ||||
|  | ||||
|         # Append proxy connect string | ||||
|         if self.proxy: | ||||
|             # Remove username/password if it exists in the URL or you will receive "ERR_NO_SUPPORTED_PROXIES" error | ||||
|             # Actual authentication handled by Puppeteer/node | ||||
|             o = urlparse(self.proxy.get('server')) | ||||
|             proxy_url = urllib.parse.quote(o._replace(netloc="{}:{}".format(o.hostname, o.port)).geturl()) | ||||
|             browserless_function_url = f"{browserless_function_url}&--proxy-server={proxy_url}" | ||||
|  | ||||
|         try: | ||||
|             amp = '&' if '?' in browserless_function_url else '?' | ||||
|             response = requests.request( | ||||
|                 method="POST", | ||||
|                 json={ | ||||
|                     "code": code, | ||||
|                     "context": { | ||||
|                         # Very primitive disk cache - USE WITH EXTREME CAUTION | ||||
|                         # Run browserless container  with -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" | ||||
|                         'disk_cache_dir': os.getenv("PUPPETEER_DISK_CACHE", False), # or path to disk cache ending in /, ie /tmp/cache/ | ||||
|                         'execute_js': self.webdriver_js_execute_code, | ||||
|                         'extra_wait_ms': extra_wait_ms, | ||||
|                         'include_filters': current_include_filters, | ||||
|                         'req_headers': request_headers, | ||||
|                         'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)), | ||||
|                         'url': url, | ||||
|                         'user_agent': {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None), | ||||
|                         'proxy_username': self.proxy.get('username', '') if self.proxy else False, | ||||
|                         'proxy_password': self.proxy.get('password', '') if self.proxy and self.proxy.get('username') else False, | ||||
|                         'no_cache_list': [ | ||||
|                             'twitter', | ||||
|                             '.pdf' | ||||
|                         ], | ||||
|                         # Could use https://github.com/easylist/easylist here, or install a plugin | ||||
|                         'block_url_list': [ | ||||
|                             'adnxs.com', | ||||
|                             'analytics.twitter.com', | ||||
|                             'doubleclick.net', | ||||
|                             'google-analytics.com', | ||||
|                             'googletagmanager', | ||||
|                             'trustpilot.com' | ||||
|                         ] | ||||
|                     } | ||||
|                 }, | ||||
|                 # @todo /function needs adding ws:// to http:// rebuild this | ||||
|                 url=browserless_function_url+f"{amp}--disable-features=AudioServiceOutOfProcess&dumpio=true&--disable-remote-fonts", | ||||
|                 timeout=wait_browserless_seconds) | ||||
|  | ||||
|         except ReadTimeout: | ||||
|             raise PageUnloadable(url=url, status_code=None, message=f"No response from browserless in {wait_browserless_seconds}s") | ||||
|         except ConnectTimeout: | ||||
|             raise PageUnloadable(url=url, status_code=None, message=f"Timed out connecting to browserless, retrying..") | ||||
|         else: | ||||
|             # 200 Here means that the communication to browserless worked only, not the page state | ||||
|             try: | ||||
|                 x = response.json() | ||||
|             except Exception as e: | ||||
|                 raise PageUnloadable(url=url, message="Error reading JSON response from browserless") | ||||
|  | ||||
|             try: | ||||
|                 self.status_code = response.status_code | ||||
|             except Exception as e: | ||||
|                 raise PageUnloadable(url=url, message="Error reading status_code code response from browserless") | ||||
|  | ||||
|             self.headers = x.get('headers') | ||||
|  | ||||
|             if self.status_code != 200 and not ignore_status_codes: | ||||
|                 raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=x.get('content','')) | ||||
|  | ||||
|             if self.status_code == 200: | ||||
|                 import base64 | ||||
|  | ||||
|                 if not x.get('screenshot'): | ||||
|                     # https://github.com/puppeteer/puppeteer/blob/v1.0.0/docs/troubleshooting.md#tips | ||||
|                     # https://github.com/puppeteer/puppeteer/issues/1834 | ||||
|                     # https://github.com/puppeteer/puppeteer/issues/1834#issuecomment-381047051 | ||||
|                     # Check your memory is shared and big enough | ||||
|                     raise ScreenshotUnavailable(url=url, status_code=None) | ||||
|  | ||||
|                 if not x.get('content', '').strip(): | ||||
|                     raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|                 self.content = x.get('content') | ||||
|                 self.instock_data = x.get('instock_data') | ||||
|                 self.screenshot = base64.b64decode(x.get('screenshot')) | ||||
|                 self.xpath_data = x.get('xpath_data') | ||||
|             else: | ||||
|                 # Some other error from browserless | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8')) | ||||
|  | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
| @@ -441,21 +322,6 @@ class base_html_playwright(Fetcher): | ||||
|             is_binary=False): | ||||
|  | ||||
|  | ||||
|         # For now, USE_EXPERIMENTAL_PUPPETEER_FETCH is not supported by watches with BrowserSteps (for now!) | ||||
|         # browser_connection_is_custom doesnt work with puppeteer style fetch (use playwright native too in this case) | ||||
|         if not self.browser_connection_is_custom and not self.browser_steps and os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH'): | ||||
|             if strtobool(os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH')): | ||||
|                 # Temporary backup solution until we rewrite the playwright code | ||||
|                 return self.run_fetch_browserless_puppeteer( | ||||
|                     url, | ||||
|                     timeout, | ||||
|                     request_headers, | ||||
|                     request_body, | ||||
|                     request_method, | ||||
|                     ignore_status_codes, | ||||
|                     current_include_filters, | ||||
|                     is_binary) | ||||
|  | ||||
|         from playwright.sync_api import sync_playwright | ||||
|         import playwright._impl._errors | ||||
|  | ||||
| @@ -528,7 +394,7 @@ class base_html_playwright(Fetcher): | ||||
|                 self.status_code = response.status | ||||
|             except Exception as e: | ||||
|                 # https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962 | ||||
|                 logger.critical(f"Response from browserless/playwright did not have a status_code! Response follows.") | ||||
|                 logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.") | ||||
|                 logger.critical(response) | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|   | ||||
| @@ -57,7 +57,7 @@ class import_url_list(Importer): | ||||
|  | ||||
|             # Flask wtform validators wont work with basic auth, use validators package | ||||
|             # Up to 5000 per batch so we dont flood the server | ||||
|             # @todo validators.url failed on local hostnames (such as referring to ourself when using browserless) | ||||
|             # @todo validators.url will fail when you add your own IP etc | ||||
|             if len(url) and 'http' in url.lower() and good < 5000: | ||||
|                 extras = None | ||||
|                 if processor: | ||||
|   | ||||
| @@ -146,7 +146,7 @@ module.exports = async ({page, context}) => { | ||||
|     var xpath_data; | ||||
|     var instock_data; | ||||
|     try { | ||||
|         // Not sure the best way here, in the future this should be a new package added to npm then run in browserless | ||||
|         // Not sure the best way here, in the future this should be a new package added to npm then run in evaluatedCode | ||||
|         // (Once the old playwright is removed) | ||||
|         xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters); | ||||
|         instock_data = await page.evaluate(() => {%instock_scrape_code%}); | ||||
|   | ||||
| @@ -6,16 +6,16 @@ | ||||
| set -x | ||||
|  | ||||
| # A extra browser is configured, but we never chose to use it, so it should NOT show in the logs | ||||
| docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url' | ||||
| docker logs browserless-custom-url &>log.txt | ||||
| docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url' | ||||
| docker logs sockpuppetbrowser-custom-url &>log.txt | ||||
| grep 'custom-browser-search-string=1' log.txt | ||||
| if [ $? -ne 1 ] | ||||
| then | ||||
|   echo "Saw a request in 'browserless-custom-url' container with 'custom-browser-search-string=1' when I should not" | ||||
|   echo "Saw a request in 'sockpuppetbrowser-custom-url' container with 'custom-browser-search-string=1' when I should not" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| docker logs browserless &>log.txt | ||||
| docker logs sockpuppetbrowser &>log.txt | ||||
| grep 'custom-browser-search-string=1' log.txt | ||||
| if [ $? -ne 1 ] | ||||
| then | ||||
| @@ -24,16 +24,16 @@ then | ||||
| fi | ||||
|  | ||||
| # Special connect string should appear in the custom-url container, but not in the 'default' one | ||||
| docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_via_custom_browser_url' | ||||
| docker logs browserless-custom-url &>log.txt | ||||
| docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_via_custom_browser_url' | ||||
| docker logs sockpuppetbrowser-custom-url &>log.txt | ||||
| grep 'custom-browser-search-string=1' log.txt | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
|   echo "Did not see request in 'browserless-custom-url' container with 'custom-browser-search-string=1' when I should" | ||||
|   echo "Did not see request in 'sockpuppetbrowser-custom-url' container with 'custom-browser-search-string=1' when I should" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| docker logs browserless &>log.txt | ||||
| docker logs sockpuppetbrowser &>log.txt | ||||
| grep 'custom-browser-search-string=1' log.txt | ||||
| if [ $? -ne 1 ] | ||||
| then | ||||
|   | ||||
| @@ -35,7 +35,7 @@ docker run --network changedet-network \ | ||||
| docker run --network changedet-network \ | ||||
|   -e "SOCKSTEST=manual-playwright" \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" \ | ||||
|   -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \ | ||||
|   --rm \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|   | ||||
| @@ -10,7 +10,7 @@ $(document).ready(function () { | ||||
|         } | ||||
|     }) | ||||
|     var browsersteps_session_id; | ||||
|     var browserless_seconds_remaining = 0; | ||||
|     var browser_interface_seconds_remaining = 0; | ||||
|     var apply_buttons_disabled = false; | ||||
|     var include_text_elements = $("#include_text_elements"); | ||||
|     var xpath_data = false; | ||||
| @@ -49,7 +49,7 @@ $(document).ready(function () { | ||||
|         $('#browsersteps-img').removeAttr('src'); | ||||
|         $("#browsersteps-click-start").show(); | ||||
|         $("#browsersteps-selector-wrapper .spinner").hide(); | ||||
|         browserless_seconds_remaining = 0; | ||||
|         browser_interface_seconds_remaining = 0; | ||||
|         browsersteps_session_id = false; | ||||
|         apply_buttons_disabled = false; | ||||
|         ctx.clearRect(0, 0, c.width, c.height); | ||||
| @@ -61,12 +61,12 @@ $(document).ready(function () { | ||||
|         $('#browser_steps >li:first-child').css('opacity', '0.5'); | ||||
|     } | ||||
|  | ||||
|     // Show seconds remaining until playwright/browserless needs to restart the session | ||||
|     // Show seconds remaining until the browser interface needs to restart the session | ||||
|     // (See comment at the top of changedetectionio/blueprint/browser_steps/__init__.py ) | ||||
|     setInterval(() => { | ||||
|         if (browserless_seconds_remaining >= 1) { | ||||
|             document.getElementById('browserless-seconds-remaining').innerText = browserless_seconds_remaining + " seconds remaining in session"; | ||||
|             browserless_seconds_remaining -= 1; | ||||
|         if (browser_interface_seconds_remaining >= 1) { | ||||
|             document.getElementById('browser-seconds-remaining').innerText = browser_interface_seconds_remaining + " seconds remaining in session"; | ||||
|             browser_interface_seconds_remaining -= 1; | ||||
|         } | ||||
|     }, "1000") | ||||
|  | ||||
| @@ -261,7 +261,7 @@ $(document).ready(function () { | ||||
|             // This should trigger 'Goto site' | ||||
|             console.log("Got startup response, requesting Goto-Site (first) step fake click"); | ||||
|             $('#browser_steps >li:first-child .apply').click(); | ||||
|             browserless_seconds_remaining = 500; | ||||
|             browser_interface_seconds_remaining = 500; | ||||
|             set_first_gotosite_disabled(); | ||||
|         }).fail(function (data) { | ||||
|             console.log(data); | ||||
|   | ||||
| @@ -228,7 +228,7 @@ User-Agent: wonderbra 1.0") }} | ||||
|                                 </div> | ||||
|                             </div> | ||||
|                             <div id="browser-steps-fieldlist" style="padding-left: 1em;  width: 350px; font-size: 80%;" > | ||||
|                                 <span id="browserless-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span> | ||||
|                                 <span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span> | ||||
|                                 {{ render_field(form.browser_steps) }} | ||||
|                             </div> | ||||
|                         </div> | ||||
|   | ||||
| @@ -7,10 +7,11 @@ from ..util import live_server_setup, wait_for_all_checks | ||||
| def do_test(client, live_server, make_test_use_extra_browser=False): | ||||
|  | ||||
|     # Grep for this string in the logs? | ||||
|     test_url = f"https://changedetection.io/ci-test.html" | ||||
|     test_url = f"https://changedetection.io/ci-test.html?non-custom-default=true" | ||||
|     # "non-custom-default" should not appear in the custom browser connection | ||||
|     custom_browser_name = 'custom browser URL' | ||||
|  | ||||
|     # needs to be set and something like 'ws://127.0.0.1:3000?stealth=1&--disable-web-security=true' | ||||
|     # needs to be set and something like 'ws://127.0.0.1:3000' | ||||
|     assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|  | ||||
|     ##################### | ||||
| @@ -19,9 +20,7 @@ def do_test(client, live_server, make_test_use_extra_browser=False): | ||||
|         data={"application-empty_pages_are_a_change": "", | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_webdriver", | ||||
|               # browserless-custom-url is setup in  .github/workflows/test-only.yml | ||||
|               # the test script run_custom_browser_url_test.sh will look for 'custom-browser-search-string' in the container logs | ||||
|               'requests-extra_browsers-0-browser_connection_url': 'ws://browserless-custom-url:3000?stealth=1&--disable-web-security=true&custom-browser-search-string=1', | ||||
|               'requests-extra_browsers-0-browser_connection_url': 'ws://sockpuppetbrowser-custom-url:3000', | ||||
|               'requests-extra_browsers-0-browser_name': custom_browser_name | ||||
|               }, | ||||
|         follow_redirects=True | ||||
| @@ -51,7 +50,8 @@ def do_test(client, live_server, make_test_use_extra_browser=False): | ||||
|         res = client.post( | ||||
|             url_for("edit_page", uuid="first"), | ||||
|             data={ | ||||
|                   "url": test_url, | ||||
|                 # 'run_customer_browser_url_tests.sh' will search for this string to know if we hit the right browser container or not | ||||
|                   "url": f"https://changedetection.io/ci-test.html?custom-browser-search-string=1", | ||||
|                   "tags": "", | ||||
|                   "headers": "", | ||||
|                   'fetch_backend': f"extra_browser_{custom_browser_name}", | ||||
|   | ||||
| @@ -456,7 +456,7 @@ def test_ignore_json_order(client, live_server): | ||||
|  | ||||
| def test_correct_header_detect(client, live_server): | ||||
|     # Like in https://github.com/dgtlmoon/changedetection.io/pull/1593 | ||||
|     # Specify extra html that JSON is sometimes wrapped in - when using Browserless/Puppeteer etc | ||||
|     # Specify extra html that JSON is sometimes wrapped in - when using SockpuppetBrowser / Puppeteer / Playwrightetc | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write('<html><body>{"hello" : 123, "world": 123}') | ||||
|  | ||||
|   | ||||
| @@ -14,7 +14,7 @@ def test_headers_in_request(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_headers', _external=True) | ||||
|     if os.getenv('PLAYWRIGHT_DRIVER_URL'): | ||||
|         # Because its no longer calling back to localhost but from browserless, set in test-only.yml | ||||
|         # Because its no longer calling back to localhost but from the browser container, set in test-only.yml | ||||
|         test_url = test_url.replace('localhost', 'changedet') | ||||
|  | ||||
|     # Add the test URL twice, we will check | ||||
| @@ -89,7 +89,7 @@ def test_body_in_request(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_body', _external=True) | ||||
|     if os.getenv('PLAYWRIGHT_DRIVER_URL'): | ||||
|         # Because its no longer calling back to localhost but from browserless, set in test-only.yml | ||||
|         # Because its no longer calling back to localhost but from the browser container, set in test-only.yml | ||||
|         test_url = test_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     res = client.post( | ||||
| @@ -181,7 +181,7 @@ def test_method_in_request(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_method', _external=True) | ||||
|     if os.getenv('PLAYWRIGHT_DRIVER_URL'): | ||||
|         # Because its no longer calling back to localhost but from browserless, set in test-only.yml | ||||
|         # Because its no longer calling back to localhost but from the browser container, set in test-only.yml | ||||
|         test_url = test_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     # Add the test URL twice, we will check | ||||
| @@ -258,7 +258,7 @@ def test_headers_textfile_in_request(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_headers', _external=True) | ||||
|     if os.getenv('PLAYWRIGHT_DRIVER_URL'): | ||||
|         # Because its no longer calling back to localhost but from browserless, set in test-only.yml | ||||
|         # Because its no longer calling back to localhost but from the browser container, set in test-only.yml | ||||
|         test_url = test_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     print ("TEST URL IS ",test_url) | ||||
|   | ||||
| @@ -30,7 +30,7 @@ services: | ||||
|   #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy | ||||
|   # | ||||
|   #       Alternative Playwright URL, do not use "'s or 's! | ||||
|   #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true | ||||
|   #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000 | ||||
|   # | ||||
|   #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password | ||||
|   # | ||||
| @@ -71,32 +71,23 @@ services: | ||||
| #            condition: service_started | ||||
|  | ||||
|      # Used for fetching pages via Playwright+Chrome where you need Javascript support. | ||||
|      # Note: Playwright/browserless not supported on ARM type devices (rPi etc) | ||||
|      # RECOMMENDED FOR FETCHING PAGES WITH CHROME | ||||
| #    playwright-chrome: | ||||
| #        hostname: playwright-chrome | ||||
| #        image: browserless/chrome:1.60-chrome-stable | ||||
| #        image: dgtlmoon/sockpuppetbrowser:latest | ||||
| #        cap_add: | ||||
| #            - SYS_ADMIN | ||||
| ## SYS_ADMIN might be too much, but it can be needed on your platform https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#running-puppeteer-on-gitlabci | ||||
| #        restart: unless-stopped | ||||
| #        environment: | ||||
| #            - SCREEN_WIDTH=1920 | ||||
| #            - SCREEN_HEIGHT=1024 | ||||
| #            - SCREEN_DEPTH=16 | ||||
| #            - ENABLE_DEBUGGER=false | ||||
| #            - PREBOOT_CHROME=true | ||||
| #            - CONNECTION_TIMEOUT=300000 | ||||
| #            - MAX_CONCURRENT_SESSIONS=10 | ||||
| #            - CHROME_REFRESH_TIME=600000 | ||||
| #            - DEFAULT_BLOCK_ADS=true | ||||
| #            - DEFAULT_STEALTH=true | ||||
| # | ||||
| #             Ignore HTTPS errors, like for self-signed certs | ||||
| #            - DEFAULT_IGNORE_HTTPS_ERRORS=true | ||||
| # | ||||
| #            - MAX_CONCURRENT_CHROME_PROCESSES=10 | ||||
|  | ||||
|      # Used for fetching pages via Playwright+Chrome where you need Javascript support. | ||||
|      # Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) | ||||
|      #       Does not report status codes (200, 404, 403) and other issues | ||||
|      # More information about the advantages of playwright/browserless https://www.browserless.io/blog/2023/12/13/migrating-selenium-to-playwright/ | ||||
| #    browser-chrome: | ||||
| #        hostname: browser-chrome | ||||
| #        image: selenium/standalone-chrome:4 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user