mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 14:47:21 +00:00 
			
		
		
		
	Compare commits
	
		
			12 Commits
		
	
	
		
			skip-empty
			...
			browserste
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 5d1ecaed94 | ||
|   | 4b49759113 | ||
|   | 1945a59a72 | ||
|   | e9a9790cb0 | ||
|   | 520650e2e6 | ||
|   | 593660e2f6 | ||
|   | 7d96b4ba83 | ||
|   | fca40e4d5b | ||
|   | 66e2dfcead | ||
|   | bce7eb68fb | ||
|   | 93c0385119 | ||
|   | e17f3be739 | 
							
								
								
									
										6
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							| @@ -40,12 +40,12 @@ jobs: | ||||
|         path: dist/ | ||||
|     - name: Test that the basic pip built package runs without error | ||||
|       run: | | ||||
|         set -e | ||||
|         set -ex | ||||
|         pip3 install dist/changedetection.io*.whl | ||||
|         changedetection.io -d /tmp -p 10000 & | ||||
|         sleep 3 | ||||
|         curl http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null | ||||
|         curl http://127.0.0.1:10000/ >/dev/null | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null | ||||
|         killall changedetection.io | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										28
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										28
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							| @@ -27,7 +27,7 @@ jobs: | ||||
|         run: | | ||||
|            | ||||
|           docker network create changedet-network | ||||
|  | ||||
|            | ||||
|           # Selenium+browserless | ||||
|           docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4 | ||||
|           docker run --network changedet-network -d --name browserless --hostname browserless -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.60-chrome-stable | ||||
| @@ -47,7 +47,7 @@ jobs: | ||||
|           # Debug SMTP server/echo message back server | ||||
|           docker run --network changedet-network -d -p 11025:11025 -p 11080:11080  --hostname mailserver test-changedetectionio  bash -c 'python changedetectionio/tests/smtp/smtp-test-server.py'  | ||||
|  | ||||
|       - name: Test built container with pytest | ||||
|       - name: Test built container with Pytest (generally as requests/plaintext fetching) | ||||
|         run: | | ||||
|           # Unit tests | ||||
|           echo "run test with unittest" | ||||
| @@ -61,20 +61,32 @@ jobs: | ||||
|           # append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG' | ||||
|           docker run --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio && ./run_basic_tests.sh' | ||||
|  | ||||
|       - name: Test built container selenium+browserless/playwright | ||||
|       - name: Specific tests in built container for Selenium | ||||
|         run: | | ||||
|            | ||||
|           # Selenium fetch | ||||
|           docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' | ||||
|            | ||||
|  | ||||
|       - name: Specific tests in built container for Playwright | ||||
|         run: |          | ||||
|           # Playwright/Browserless fetch | ||||
|           docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py' | ||||
|            | ||||
|  | ||||
|       - name: Specific tests in built container for headers and requests checks with Playwright | ||||
|         run: |                   | ||||
|           # Settings headers playwright tests - Call back in from Browserless, check headers | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|  | ||||
|       - name: Specific tests in built container for headers and requests checks with Selenium | ||||
|         run: |                   | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|  | ||||
|       - name: Specific tests in built container with Playwright as Puppeteer experimental fetcher | ||||
|         run: |                   | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "USE_EXPERIMENTAL_PUPPETEER_FETCH=yes" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py'           | ||||
|            | ||||
|  | ||||
|       - name: Test built container restock detection via Playwright | ||||
|         run: |                             | ||||
|           # restock detection via playwright - added name=changedet here so that playwright/browserless can connect to it | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|  | ||||
| @@ -106,10 +118,10 @@ jobs: | ||||
|           docker run --name test-changedetectionio -p 5556:5000  -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           # Should return 0 (no error) when grep finds it | ||||
|           curl -s http://localhost:5556 |grep -q checkbox-uuid | ||||
|           curl --retry-connrefused --retry 6  -s http://localhost:5556 |grep -q checkbox-uuid | ||||
|            | ||||
|           # and IPv6 | ||||
|           curl -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|           curl --retry-connrefused --retry 6  -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|  | ||||
|           # Check whether TRACE log is enabled. | ||||
|           # Also, check whether TRACE is came from STDERR | ||||
|   | ||||
| @@ -18,8 +18,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue | ||||
|     def accept(uuid): | ||||
|         datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT | ||||
|         update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) | ||||
|         return redirect(url_for("form_watch_checknow", uuid=uuid)) | ||||
|  | ||||
|         return redirect(url_for("index")) | ||||
|  | ||||
|     @login_required | ||||
|     @price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET']) | ||||
|   | ||||
| @@ -51,6 +51,7 @@ class BrowserStepsStepException(Exception): | ||||
|         return | ||||
|  | ||||
|  | ||||
| # @todo - make base Exception class that announces via logger() | ||||
| class PageUnloadable(Exception): | ||||
|     def __init__(self, status_code, url, message, screenshot=False): | ||||
|         # Set this so we can use it in other parts of the app | ||||
| @@ -60,6 +61,10 @@ class PageUnloadable(Exception): | ||||
|         self.message = message | ||||
|         return | ||||
|  | ||||
| class BrowserStepsInUnsupportedFetcher(Exception): | ||||
|     def __init__(self, url): | ||||
|         self.url = url | ||||
|         return | ||||
|  | ||||
| class EmptyReply(Exception): | ||||
|     def __init__(self, status_code, url, screenshot=None): | ||||
| @@ -389,10 +394,24 @@ class base_html_playwright(Fetcher): | ||||
|             raise PageUnloadable(url=url, status_code=None, message=f"Timed out connecting to browserless, retrying..") | ||||
|         else: | ||||
|             # 200 Here means that the communication to browserless worked only, not the page state | ||||
|             if response.status_code == 200: | ||||
|             try: | ||||
|                 x = response.json() | ||||
|             except Exception as e: | ||||
|                 raise PageUnloadable(url=url, message="Error reading JSON response from browserless") | ||||
|  | ||||
|             try: | ||||
|                 self.status_code = response.status_code | ||||
|             except Exception as e: | ||||
|                 raise PageUnloadable(url=url, message="Error reading status_code code response from browserless") | ||||
|  | ||||
|             self.headers = x.get('headers') | ||||
|  | ||||
|             if self.status_code != 200 and not ignore_status_codes: | ||||
|                 raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=x.get('content','')) | ||||
|  | ||||
|             if self.status_code == 200: | ||||
|                 import base64 | ||||
|  | ||||
|                 x = response.json() | ||||
|                 if not x.get('screenshot'): | ||||
|                     # https://github.com/puppeteer/puppeteer/blob/v1.0.0/docs/troubleshooting.md#tips | ||||
|                     # https://github.com/puppeteer/puppeteer/issues/1834 | ||||
| @@ -403,16 +422,10 @@ class base_html_playwright(Fetcher): | ||||
|                 if not x.get('content', '').strip(): | ||||
|                     raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|                 if x.get('status_code', 200) != 200 and not ignore_status_codes: | ||||
|                     raise Non200ErrorCodeReceived(url=url, status_code=x.get('status_code', 200), page_html=x['content']) | ||||
|  | ||||
|                 self.content = x.get('content') | ||||
|                 self.headers = x.get('headers') | ||||
|                 self.instock_data = x.get('instock_data') | ||||
|                 self.screenshot = base64.b64decode(x.get('screenshot')) | ||||
|                 self.status_code = x.get('status_code') | ||||
|                 self.xpath_data = x.get('xpath_data') | ||||
|  | ||||
|             else: | ||||
|                 # Some other error from browserless | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8')) | ||||
| @@ -703,6 +716,9 @@ class html_requests(Fetcher): | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|         if self.browser_steps_get_valid_steps(): | ||||
|             raise BrowserStepsInUnsupportedFetcher(url=url) | ||||
|  | ||||
|         # Make requests use a more modern looking user-agent | ||||
|         if not {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None): | ||||
|             request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", | ||||
| @@ -742,6 +758,8 @@ class html_requests(Fetcher): | ||||
|                 if encoding: | ||||
|                     r.encoding = encoding | ||||
|  | ||||
|         self.headers = r.headers | ||||
|  | ||||
|         if not r.content or not len(r.content): | ||||
|             raise EmptyReply(url=url, status_code=r.status_code) | ||||
|  | ||||
| @@ -758,7 +776,7 @@ class html_requests(Fetcher): | ||||
|         else: | ||||
|             self.content = r.text | ||||
|  | ||||
|         self.headers = r.headers | ||||
|  | ||||
|         self.raw_content = r.content | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -13,7 +13,6 @@ from threading import Event | ||||
| import datetime | ||||
| import flask_login | ||||
| from loguru import logger | ||||
| import sys | ||||
| import os | ||||
| import pytz | ||||
| import queue | ||||
| @@ -317,6 +316,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|     @app.route("/rss", methods=['GET']) | ||||
|     def rss(): | ||||
|         from jinja2 import Environment, BaseLoader | ||||
|         jinja2_env = Environment(loader=BaseLoader) | ||||
|         now = time.time() | ||||
|         # Always requires token set | ||||
|         app_rss_token = datastore.data['settings']['application'].get('rss_access_token') | ||||
|         rss_url_token = request.args.get('token') | ||||
| @@ -380,8 +382,12 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                                              include_equal=False, | ||||
|                                              line_feed_sep="<br>") | ||||
|  | ||||
|                 fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff), | ||||
|                            type='CDATA') | ||||
|                 # @todo Make this configurable and also consider html-colored markup | ||||
|                 # @todo User could decide if <link> goes to the diff page, or to the watch link | ||||
|                 rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n" | ||||
|                 content = jinja2_env.from_string(rss_template).render(watch_title=watch_title, html_diff=html_diff, watch_url=watch.link) | ||||
|  | ||||
|                 fe.content(content=content, type='CDATA') | ||||
|  | ||||
|                 fe.guid(guid, permalink=False) | ||||
|                 dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key)) | ||||
| @@ -390,6 +396,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         response = make_response(fg.rss_str()) | ||||
|         response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8') | ||||
|         logger.trace(f"RSS generated in {time.time() - now:.3f}s") | ||||
|         return response | ||||
|  | ||||
|     @app.route("/", methods=['GET']) | ||||
|   | ||||
| @@ -56,6 +56,7 @@ base_config = { | ||||
|     'previous_md5': False, | ||||
|     'previous_md5_before_filters': False,  # Used for skipping changedetection entirely | ||||
|     'proxy': None,  # Preferred proxy connection | ||||
|     'remote_server_reply': None, # From 'server' reply header | ||||
|     'subtractive_selectors': [], | ||||
|     'tag': '', # Old system of text name for a tag, to be removed | ||||
|     'tags': [], # list of UUIDs to App.Tags | ||||
|   | ||||
| @@ -90,5 +90,10 @@ $(document).ready(function () { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     $('#diff-form').on('submit', function (e) { | ||||
|         if ($('select[name=from_version]').val() === $('select[name=to_version]').val()) { | ||||
|             e.preventDefault(); | ||||
|             alert('Error - You are trying to compare the same version.'); | ||||
|         } | ||||
|     }); | ||||
| }); | ||||
|   | ||||
| @@ -255,6 +255,7 @@ class ChangeDetectionStore: | ||||
|                 'last_viewed': 0, | ||||
|                 'previous_md5': False, | ||||
|                 'previous_md5_before_filters': False, | ||||
|                 'remote_server_reply': None, | ||||
|                 'track_ldjson_price_data': None, | ||||
|             }) | ||||
|  | ||||
|   | ||||
| @@ -13,7 +13,7 @@ | ||||
| <script src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script> | ||||
|  | ||||
| <div id="settings"> | ||||
|     <form class="pure-form " action="" method="GET"> | ||||
|     <form class="pure-form " action="" method="GET" id="diff-form"> | ||||
|         <fieldset> | ||||
|             {% if versions|length >= 1 %} | ||||
|                 <strong>Compare</strong> | ||||
|   | ||||
| @@ -401,6 +401,7 @@ Unavailable") }} | ||||
|                                 <li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li> | ||||
|                                 <li>Keyword example ‐ example <code>Out of stock</code></li> | ||||
|                                 <li>Use groups to extract just that text ‐ example <code>/reports.+?(\d+)/i</code> returns a list of years only</li> | ||||
|                                 <li>Example - match lines containing a keyword <code>/.*icecream.*/</code></li> | ||||
|                             </ul> | ||||
|                         </li> | ||||
|                         <li>One line per regular-expression/string match</li> | ||||
|   | ||||
| @@ -163,6 +163,7 @@ def test_api_simple(client, live_server): | ||||
|     # Loading the most recent snapshot should force viewed to become true | ||||
|     client.get(url_for("diff_history_page", uuid="first"), follow_redirects=True) | ||||
|  | ||||
|     time.sleep(3) | ||||
|     # Fetch the whole watch again, viewed should be true | ||||
|     res = client.get( | ||||
|         url_for("watch", uuid=watch_uuid), | ||||
|   | ||||
| @@ -10,7 +10,7 @@ def test_setup(live_server): | ||||
| # Hard to just add more live server URLs when one test is already running (I think) | ||||
| # So we add our test here (was in a different file) | ||||
| def test_headers_in_request(client, live_server): | ||||
|     #live_server_setup(live_server) | ||||
|     #ve_server_setup(live_server) | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_headers', _external=True) | ||||
|     if os.getenv('PLAYWRIGHT_DRIVER_URL'): | ||||
| @@ -70,16 +70,17 @@ def test_headers_in_request(client, live_server): | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Re #137 -  Examine the JSON index file, it should have only one set of headers entered | ||||
|     # Re #137 -  It should have only one set of headers entered | ||||
|     watches_with_headers = 0 | ||||
|     with open('test-datastore/url-watches.json') as f: | ||||
|         app_struct = json.load(f) | ||||
|         for uuid in app_struct['watching']: | ||||
|             if (len(app_struct['watching'][uuid]['headers'])): | ||||
|     for k, watch in client.application.config.get('DATASTORE').data.get('watching').items(): | ||||
|             if (len(watch['headers'])): | ||||
|                 watches_with_headers += 1 | ||||
|     assert watches_with_headers == 1 | ||||
|  | ||||
|     # 'server' http header was automatically recorded | ||||
|     for k, watch in client.application.config.get('DATASTORE').data.get('watching').items(): | ||||
|         assert 'custom' in watch.get('remote_server_reply') # added in util.py | ||||
|  | ||||
|     # Should be only one with headers set | ||||
|     assert watches_with_headers==1 | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
|   | ||||
| @@ -175,12 +175,16 @@ def live_server_setup(live_server): | ||||
|     @live_server.app.route('/test-headers') | ||||
|     def test_headers(): | ||||
|  | ||||
|         output= [] | ||||
|         output = [] | ||||
|  | ||||
|         for header in request.headers: | ||||
|              output.append("{}:{}".format(str(header[0]),str(header[1])   )) | ||||
|             output.append("{}:{}".format(str(header[0]), str(header[1]))) | ||||
|  | ||||
|         return "\n".join(output) | ||||
|         content = "\n".join(output) | ||||
|  | ||||
|         resp = make_response(content, 200) | ||||
|         resp.headers['server'] = 'custom' | ||||
|         return resp | ||||
|  | ||||
|     # Just return the body in the request | ||||
|     @live_server.app.route('/test-body', methods=['POST', 'GET']) | ||||
|   | ||||
| @@ -430,6 +430,12 @@ class update_worker(threading.Thread): | ||||
|                                                                            'last_check_status': e.status_code, | ||||
|                                                                            'has_ldjson_price_data': None}) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetcher.BrowserStepsInUnsupportedFetcher as e: | ||||
|                         err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher." | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) | ||||
|                         process_changedetection_results = False | ||||
|                         logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}") | ||||
|  | ||||
|                     except UnableToExtractRestockData as e: | ||||
|                         # Usually when fetcher.instock_data returns empty | ||||
|                         logger.error(f"Exception (UnableToExtractRestockData) reached processing watch UUID: {uuid}") | ||||
| @@ -491,6 +497,16 @@ class update_worker(threading.Thread): | ||||
|                     if self.datastore.data['watching'].get(uuid): | ||||
|                         # Always record that we atleast tried | ||||
|                         count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1 | ||||
|  | ||||
|                         # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds | ||||
|                         try: | ||||
|                             server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255] | ||||
|                             self.datastore.update_watch(uuid=uuid, | ||||
|                                                         update_obj={'remote_server_reply': server_header} | ||||
|                                                         ) | ||||
|                         except Exception as e: | ||||
|                             pass | ||||
|  | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), | ||||
|                                                                            'last_checked': round(time.time()), | ||||
|                                                                            'check_count': count | ||||
|   | ||||
| @@ -94,7 +94,8 @@ services: | ||||
| # | ||||
|  | ||||
|      # Used for fetching pages via Playwright+Chrome where you need Javascript support. | ||||
|      # Note: works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) and other issues | ||||
|      # Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) | ||||
|      #       Does not report status codes (200, 404, 403) and other issues | ||||
|      # More information about the advantages of playwright/browserless https://www.browserless.io/blog/2023/12/13/migrating-selenium-to-playwright/ | ||||
| #    browser-chrome: | ||||
| #        hostname: browser-chrome | ||||
|   | ||||
		Reference in New Issue
	
	Block a user