mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 06:37:41 +00:00 
			
		
		
		
	Compare commits
	
		
			25 Commits
		
	
	
		
			with-error
			...
			pip-securi
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | d2daa6f28b | ||
|   | 963869b40a | ||
|   | 2f91695293 | ||
|   | 162a77079c | ||
|   | 01b81f4dbc | ||
|   | e209d9fba0 | ||
|   | eb2bd1ec8c | ||
|   | 3b43da35ec | ||
|   | a0665e1f18 | ||
|   | 9ffe7e0eaf | ||
|   | 3e5671a3a2 | ||
|   | 32f490783a | ||
|   | 2819e05615 | ||
|   | cd1aca9ee3 | ||
|   | 6a589e14f3 | ||
|   | dbb76f3618 | ||
|   | 4ae27af511 | ||
|   | e1860549dc | ||
|   | 9765d56a23 | ||
|   | 349111eb35 | ||
|   | 71e50569a0 | ||
|   | c372942295 | ||
|   | 0aef5483d9 | ||
|   | c266c64b94 | ||
|   | 32e5498a9d | 
							
								
								
									
										4
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							| @@ -29,8 +29,8 @@ jobs: | ||||
|           docker network create changedet-network | ||||
|  | ||||
|           # Selenium+browserless | ||||
|           docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome-debug:3.141.59 | ||||
|           docker run --network changedet-network -d --hostname browserless -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.53-chrome-stable | ||||
|           docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4 | ||||
|           docker run --network changedet-network -d --hostname browserless -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.60-chrome-stable | ||||
|  | ||||
|       - name: Build changedetection.io container for testing | ||||
|         run: |          | ||||
|   | ||||
							
								
								
									
										10
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -1,5 +1,5 @@ | ||||
| # pip dependencies install stage | ||||
| FROM python:3.11-slim-bullseye as builder | ||||
| FROM python:3.11-slim-bookworm as builder | ||||
|  | ||||
| # See `cryptography` pin comment in requirements.txt | ||||
| ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 | ||||
| @@ -20,6 +20,11 @@ WORKDIR /install | ||||
|  | ||||
| COPY requirements.txt /requirements.txt | ||||
|  | ||||
| # Instructing pip to fetch wheels from piwheels.org" on ARMv6 and ARMv7 machines | ||||
| RUN if [ "$(dpkg --print-architecture)" = "armhf" ] || [ "$(dpkg --print-architecture)" = "armel" ]; then \ | ||||
|       printf "[global]\nextra-index-url=https://www.piwheels.org/simple\n" > /etc/pip.conf; \ | ||||
|     fi; | ||||
|  | ||||
| RUN pip install --target=/dependencies -r /requirements.txt | ||||
|  | ||||
| # Playwright is an alternative to Selenium | ||||
| @@ -29,10 +34,9 @@ RUN pip install --target=/dependencies playwright~=1.27.1 \ | ||||
|     || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." | ||||
|  | ||||
| # Final image stage | ||||
| FROM python:3.11-slim-bullseye | ||||
| FROM python:3.11-slim-bookworm | ||||
|  | ||||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|     libssl1.1 \ | ||||
|     libxslt1.1 \ | ||||
|     # For pdftohtml | ||||
|     poppler-utils \ | ||||
|   | ||||
| @@ -38,7 +38,7 @@ from flask_paginate import Pagination, get_page_parameter | ||||
| from changedetectionio import html_tools | ||||
| from changedetectionio.api import api_v1 | ||||
|  | ||||
| __version__ = '0.45.3' | ||||
| __version__ = '0.45.5' | ||||
|  | ||||
| from changedetectionio.store import BASE_URL_NOT_SET_TEXT | ||||
|  | ||||
| @@ -416,11 +416,18 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         # Sort by last_changed and add the uuid which is usually the key.. | ||||
|         sorted_watches = [] | ||||
|         with_errors = request.args.get('with_errors') == "1" | ||||
|         errored_count = 0 | ||||
|         search_q = request.args.get('q').strip().lower() if request.args.get('q') else False | ||||
|         for uuid, watch in datastore.data['watching'].items(): | ||||
|             if with_errors and not watch.get('last_error'): | ||||
|                 continue | ||||
|  | ||||
|             if limit_tag and not limit_tag in watch['tags']: | ||||
|                     continue | ||||
|  | ||||
|             if watch.get('last_error'): | ||||
|                 errored_count += 1 | ||||
|                  | ||||
|             if search_q: | ||||
|                 if (watch.get('title') and search_q in watch.get('title').lower()) or search_q in watch.get('url', '').lower(): | ||||
|                     sorted_watches.append(watch) | ||||
| @@ -442,6 +449,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                                  active_tag=limit_tag, | ||||
|                                  app_rss_token=datastore.data['settings']['application']['rss_access_token'], | ||||
|                                  datastore=datastore, | ||||
|                                  errored_count=errored_count, | ||||
|                                  form=form, | ||||
|                                  guid=datastore.data['app_guid'], | ||||
|                                  has_proxies=datastore.proxy_list, | ||||
| @@ -814,6 +822,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         from . import forms | ||||
|  | ||||
|         if request.method == 'POST': | ||||
|  | ||||
|             from .importer import import_url_list, import_distill_io_json | ||||
|  | ||||
|             # URL List import | ||||
| @@ -837,11 +846,32 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 for uuid in d_importer.new_uuids: | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|  | ||||
|             # XLSX importer | ||||
|             if request.files and request.files.get('xlsx_file'): | ||||
|                 file = request.files['xlsx_file'] | ||||
|                 from .importer import import_xlsx_wachete, import_xlsx_custom | ||||
|  | ||||
|                 if request.values.get('file_mapping') == 'wachete': | ||||
|                     w_importer = import_xlsx_wachete() | ||||
|                     w_importer.run(data=file, flash=flash, datastore=datastore) | ||||
|                 else: | ||||
|                     w_importer = import_xlsx_custom() | ||||
|                     # Building mapping of col # to col # type | ||||
|                     map = {} | ||||
|                     for i in range(10): | ||||
|                         c = request.values.get(f"custom_xlsx[col_{i}]") | ||||
|                         v = request.values.get(f"custom_xlsx[col_type_{i}]") | ||||
|                         if c and v: | ||||
|                             map[int(c)] = v | ||||
|  | ||||
|                     w_importer.import_profile = map | ||||
|                     w_importer.run(data=file, flash=flash, datastore=datastore) | ||||
|  | ||||
|                 for uuid in w_importer.new_uuids: | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|  | ||||
|         form = forms.importForm(formdata=request.form if request.method == 'POST' else None, | ||||
| #                               data=default, | ||||
|                                ) | ||||
|         # Could be some remaining, or we could be on GET | ||||
|         form = forms.importForm(formdata=request.form if request.method == 'POST' else None) | ||||
|         output = render_template("import.html", | ||||
|                                  form=form, | ||||
|                                  import_url_list_remaining="\n".join(remaining_urls), | ||||
| @@ -855,7 +885,10 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|     def mark_all_viewed(): | ||||
|  | ||||
|         # Save the current newest history as the most recently viewed | ||||
|         with_errors = request.args.get('with_errors') == "1" | ||||
|         for watch_uuid, watch in datastore.data['watching'].items(): | ||||
|             if with_errors and not watch.get('last_error'): | ||||
|                 continue | ||||
|             datastore.set_last_viewed(watch_uuid, int(time.time())) | ||||
|  | ||||
|         return redirect(url_for('index')) | ||||
| @@ -1264,6 +1297,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         # Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True}))) | ||||
|         tag = request.args.get('tag') | ||||
|         uuid = request.args.get('uuid') | ||||
|         with_errors = request.args.get('with_errors') == "1" | ||||
|  | ||||
|         i = 0 | ||||
|  | ||||
|         running_uuids = [] | ||||
| @@ -1279,6 +1314,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             # Items that have this current tag | ||||
|             for watch_uuid, watch in datastore.data['watching'].items(): | ||||
|                 if tag in watch.get('tags', {}): | ||||
|                     if with_errors and not watch.get('last_error'): | ||||
|                         continue | ||||
|                     if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: | ||||
|                         update_q.put( | ||||
|                             queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}) | ||||
| @@ -1289,8 +1326,11 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             # No tag, no uuid, add everything. | ||||
|             for watch_uuid, watch in datastore.data['watching'].items(): | ||||
|                 if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: | ||||
|                     if with_errors and not watch.get('last_error'): | ||||
|                         continue | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})) | ||||
|                     i += 1 | ||||
|  | ||||
|         flash("{} watches queued for rechecking.".format(i)) | ||||
|         return redirect(url_for('index', tag=tag)) | ||||
|  | ||||
|   | ||||
| @@ -77,13 +77,13 @@ class steppable_browser_interface(): | ||||
|     def action_goto_url(self, selector=None, value=None): | ||||
|         # self.page.set_viewport_size({"width": 1280, "height": 5000}) | ||||
|         now = time.time() | ||||
|         response = self.page.goto(value, timeout=0, wait_until='commit') | ||||
|  | ||||
|         # Wait_until = commit | ||||
|         # - `'commit'` - consider operation to be finished when network response is received and the document started loading. | ||||
|         # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds | ||||
|         # This seemed to solve nearly all 'TimeoutErrors' | ||||
|         response = self.page.goto(value, timeout=0, wait_until='load') | ||||
|         # Should be the same as the puppeteer_fetch.js methods, means, load with no timeout set (skip timeout) | ||||
|         #and also wait for seconds ? | ||||
|         #await page.waitForTimeout(1000); | ||||
|         #await page.waitForTimeout(extra_wait_ms); | ||||
|         print("Time to goto URL ", time.time() - now) | ||||
|         return response | ||||
|  | ||||
|     def action_click_element_containing_text(self, selector=None, value=''): | ||||
|         if not len(value.strip()): | ||||
| @@ -99,7 +99,8 @@ class steppable_browser_interface(): | ||||
|         self.page.fill(selector, value, timeout=10 * 1000) | ||||
|  | ||||
|     def action_execute_js(self, selector, value): | ||||
|         self.page.evaluate(value) | ||||
|         response = self.page.evaluate(value) | ||||
|         return response | ||||
|  | ||||
|     def action_click_element(self, selector, value): | ||||
|         print("Clicking element") | ||||
|   | ||||
| @@ -159,6 +159,16 @@ class Fetcher(): | ||||
|         """ | ||||
|         return {k.lower(): v for k, v in self.headers.items()} | ||||
|  | ||||
|     def browser_steps_get_valid_steps(self): | ||||
|         if self.browser_steps is not None and len(self.browser_steps): | ||||
|             valid_steps = filter( | ||||
|                 lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), | ||||
|                 self.browser_steps) | ||||
|  | ||||
|             return valid_steps | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def iterate_browser_steps(self): | ||||
|         from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|         from playwright._impl._api_types import TimeoutError | ||||
| @@ -170,10 +180,7 @@ class Fetcher(): | ||||
|         if self.browser_steps is not None and len(self.browser_steps): | ||||
|             interface = steppable_browser_interface() | ||||
|             interface.page = self.page | ||||
|  | ||||
|             valid_steps = filter( | ||||
|                 lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), | ||||
|                 self.browser_steps) | ||||
|             valid_steps = self.browser_steps_get_valid_steps() | ||||
|  | ||||
|             for step in valid_steps: | ||||
|                 step_n += 1 | ||||
| @@ -326,9 +333,8 @@ class base_html_playwright(Fetcher): | ||||
|             # Remove username/password if it exists in the URL or you will receive "ERR_NO_SUPPORTED_PROXIES" error | ||||
|             # Actual authentication handled by Puppeteer/node | ||||
|             o = urlparse(self.proxy.get('server')) | ||||
|             # Remove scheme, socks5:// doesnt always work and it will autodetect anyway | ||||
|             proxy_url = urllib.parse.quote(o._replace(netloc="{}:{}".format(o.hostname, o.port)).geturl().replace(f"{o.scheme}://", '', 1)) | ||||
|             browserless_function_url = f"{browserless_function_url}&--proxy-server={proxy_url}&dumpio=true" | ||||
|             proxy_url = urllib.parse.quote(o._replace(netloc="{}:{}".format(o.hostname, o.port)).geturl()) | ||||
|             browserless_function_url = f"{browserless_function_url}&--proxy-server={proxy_url}" | ||||
|  | ||||
|         try: | ||||
|             amp = '&' if '?' in browserless_function_url else '?' | ||||
| @@ -464,39 +470,26 @@ class base_html_playwright(Fetcher): | ||||
|             if len(request_headers): | ||||
|                 context.set_extra_http_headers(request_headers) | ||||
|  | ||||
|                 self.page.set_default_navigation_timeout(90000) | ||||
|                 self.page.set_default_timeout(90000) | ||||
|             # Listen for all console events and handle errors | ||||
|             self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) | ||||
|  | ||||
|                 # Listen for all console events and handle errors | ||||
|                 self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) | ||||
|             # Re-use as much code from browser steps as possible so its the same | ||||
|             from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|             browsersteps_interface = steppable_browser_interface() | ||||
|             browsersteps_interface.page = self.page | ||||
|  | ||||
|             # Goto page | ||||
|             try: | ||||
|                 # Wait_until = commit | ||||
|                 # - `'commit'` - consider operation to be finished when network response is received and the document started loading. | ||||
|                 # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds | ||||
|                 # This seemed to solve nearly all 'TimeoutErrors' | ||||
|                 response = self.page.goto(url, wait_until='commit') | ||||
|             except playwright._impl._api_types.Error as e: | ||||
|                 # Retry once - https://github.com/browserless/chrome/issues/2485 | ||||
|                 # Sometimes errors related to invalid cert's and other can be random | ||||
|                 print("Content Fetcher > retrying request got error - ", str(e)) | ||||
|                 time.sleep(1) | ||||
|                 response = self.page.goto(url, wait_until='commit') | ||||
|             except Exception as e: | ||||
|                 print("Content Fetcher > Other exception when page.goto", str(e)) | ||||
|             response = browsersteps_interface.action_goto_url(value=url) | ||||
|             self.headers = response.all_headers() | ||||
|  | ||||
|             if response is None: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|                 print("Content Fetcher > Response object was none") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             # Execute any browser steps | ||||
|             try: | ||||
|                 extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|                 self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
|                 if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code): | ||||
|                     self.page.evaluate(self.webdriver_js_execute_code) | ||||
|  | ||||
|                     browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None) | ||||
|             except playwright._impl._api_types.TimeoutError as e: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
| @@ -508,28 +501,26 @@ class base_html_playwright(Fetcher): | ||||
|                 browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|             if response is None: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 print("Content Fetcher > Response object was none") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             # Run Browser Steps here | ||||
|             self.iterate_browser_steps() | ||||
|  | ||||
|             extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|             time.sleep(extra_wait) | ||||
|             self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
|  | ||||
|             self.content = self.page.content() | ||||
|             self.status_code = response.status | ||||
|  | ||||
|             if self.status_code != 200 and not ignore_status_codes: | ||||
|                 raise Non200ErrorCodeReceived(url=url, status_code=self.status_code) | ||||
|  | ||||
|             if len(self.page.content().strip()) == 0: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 print("Content Fetcher > Content was empty") | ||||
|                 raise EmptyReply(url=url, status_code=response.status) | ||||
|  | ||||
|             self.status_code = response.status | ||||
|             self.headers = response.all_headers() | ||||
|             # Run Browser Steps here | ||||
|             if self.browser_steps_get_valid_steps(): | ||||
|                 self.iterate_browser_steps() | ||||
|                  | ||||
|             self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
|             # So we can find an element on the page where its selector was entered manually (maybe not xPath etc) | ||||
|             if current_include_filters is not None: | ||||
| @@ -541,6 +532,7 @@ class base_html_playwright(Fetcher): | ||||
|                 "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}") | ||||
|             self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}") | ||||
|  | ||||
|             self.content = self.page.content() | ||||
|             # Bug 3 in Playwright screenshot handling | ||||
|             # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|             # JPEG is better here because the screenshots can be very very large | ||||
| @@ -555,7 +547,7 @@ class base_html_playwright(Fetcher): | ||||
|             except Exception as e: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 raise ScreenshotUnavailable(url=url, status_code=None) | ||||
|                 raise ScreenshotUnavailable(url=url, status_code=response.status_code) | ||||
|  | ||||
|             context.close() | ||||
|             browser.close() | ||||
| @@ -614,14 +606,17 @@ class base_html_webdriver(Fetcher): | ||||
|             is_binary=False): | ||||
|  | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | ||||
|         from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|         from selenium.common.exceptions import WebDriverException | ||||
|         # request_body, request_method unused for now, until some magic in the future happens. | ||||
|  | ||||
|         options = ChromeOptions() | ||||
|         if self.proxy: | ||||
|             options.proxy = self.proxy | ||||
|  | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor=self.command_executor, | ||||
|             desired_capabilities=DesiredCapabilities.CHROME, | ||||
|             proxy=self.proxy) | ||||
|             options=options) | ||||
|  | ||||
|         try: | ||||
|             self.driver.get(url) | ||||
| @@ -653,11 +648,11 @@ class base_html_webdriver(Fetcher): | ||||
|     # Does the connection to the webdriver work? run a test connection. | ||||
|     def is_ready(self): | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | ||||
|         from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|  | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor=self.command_executor, | ||||
|             desired_capabilities=DesiredCapabilities.CHROME) | ||||
|             options=ChromeOptions()) | ||||
|  | ||||
|         # driver.quit() seems to cause better exceptions | ||||
|         self.quit() | ||||
|   | ||||
| @@ -15,14 +15,20 @@ from wtforms import ( | ||||
|     validators, | ||||
|     widgets | ||||
| ) | ||||
| from flask_wtf.file import FileField, FileAllowed | ||||
| from wtforms.fields import FieldList | ||||
|  | ||||
| from wtforms.validators import ValidationError | ||||
|  | ||||
| from validators.url import url as url_validator | ||||
|  | ||||
|  | ||||
| # default | ||||
| # each select <option data-enabled="enabled-0-0" | ||||
| from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config | ||||
|  | ||||
| from changedetectionio import content_fetcher | ||||
| from changedetectionio import content_fetcher, html_tools | ||||
|  | ||||
| from changedetectionio.notification import ( | ||||
|     valid_notification_formats, | ||||
| ) | ||||
| @@ -40,7 +46,7 @@ valid_method = { | ||||
| } | ||||
|  | ||||
| default_method = 'GET' | ||||
|  | ||||
| allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
|  | ||||
| class StringListField(StringField): | ||||
|     widget = widgets.TextArea() | ||||
| @@ -260,19 +266,23 @@ class validateURL(object): | ||||
|         self.message = message | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         import validators | ||||
|         # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|         allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
|         try: | ||||
|             validators.url(field.data.strip(), simple_host=allow_simplehost) | ||||
|         except validators.ValidationFailure: | ||||
|             message = field.gettext('\'%s\' is not a valid URL.' % (field.data.strip())) | ||||
|             raise ValidationError(message) | ||||
|         # This should raise a ValidationError() or not | ||||
|         validate_url(field.data) | ||||
|  | ||||
|         from .model.Watch import is_safe_url | ||||
|         if not is_safe_url(field.data): | ||||
|             raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX') | ||||
| def validate_url(test_url): | ||||
|     # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|     try: | ||||
|         url_validator(test_url, simple_host=allow_simplehost) | ||||
|     except validators.ValidationError: | ||||
|         #@todo check for xss | ||||
|         message = f"'{test_url}' is not a valid URL." | ||||
|         # This should be wtforms.validators. | ||||
|         raise ValidationError(message) | ||||
|  | ||||
|     from .model.Watch import is_safe_url | ||||
|     if not is_safe_url(test_url): | ||||
|         # This should be wtforms.validators. | ||||
|         raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format') | ||||
|  | ||||
| class ValidateListRegex(object): | ||||
|     """ | ||||
| @@ -284,11 +294,10 @@ class ValidateListRegex(object): | ||||
|     def __call__(self, form, field): | ||||
|  | ||||
|         for line in field.data: | ||||
|             if line[0] == '/' and line[-1] == '/': | ||||
|                 # Because internally we dont wrap in / | ||||
|                 line = line.strip('/') | ||||
|             if re.search(html_tools.PERL_STYLE_REGEX, line, re.IGNORECASE): | ||||
|                 try: | ||||
|                     re.compile(line) | ||||
|                     regex = html_tools.perl_style_slash_enclosed_regex_to_options(line) | ||||
|                     re.compile(regex) | ||||
|                 except re.error: | ||||
|                     message = field.gettext('RegEx \'%s\' is not a valid regular expression.') | ||||
|                     raise ValidationError(message % (line)) | ||||
| @@ -398,6 +407,9 @@ class importForm(Form): | ||||
|     from . import processors | ||||
|     processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff") | ||||
|     urls = TextAreaField('URLs') | ||||
|     xlsx_file = FileField('Upload .xlsx file', validators=[FileAllowed(['xlsx'], 'Must be .xlsx file!')]) | ||||
|     file_mapping = SelectField('File mapping', [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')}) | ||||
|  | ||||
|  | ||||
| class SingleBrowserStep(Form): | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,9 @@ | ||||
| from abc import ABC, abstractmethod | ||||
| import time | ||||
| import validators | ||||
| from wtforms import ValidationError | ||||
|  | ||||
| from changedetectionio.forms import validate_url | ||||
|  | ||||
|  | ||||
| class Importer(): | ||||
| @@ -12,6 +15,7 @@ class Importer(): | ||||
|         self.new_uuids = [] | ||||
|         self.good = 0 | ||||
|         self.remaining_data = [] | ||||
|         self.import_profile = None | ||||
|  | ||||
|     @abstractmethod | ||||
|     def run(self, | ||||
| @@ -132,3 +136,145 @@ class import_distill_io_json(Importer): | ||||
|                     good += 1 | ||||
|  | ||||
|         flash("{} Imported from Distill.io in {:.2f}s, {} Skipped.".format(len(self.new_uuids), time.time() - now, len(self.remaining_data))) | ||||
|  | ||||
| class import_xlsx_wachete(Importer): | ||||
|  | ||||
|     def run(self, | ||||
|             data, | ||||
|             flash, | ||||
|             datastore, | ||||
|             ): | ||||
|         good = 0 | ||||
|         now = time.time() | ||||
|         self.new_uuids = [] | ||||
|  | ||||
|         from openpyxl import load_workbook | ||||
|  | ||||
|         try: | ||||
|             wb = load_workbook(data) | ||||
|         except Exception as e: | ||||
|             #@todo correct except | ||||
|             flash("Unable to read export XLSX file, something wrong with the file?", 'error') | ||||
|             return | ||||
|  | ||||
|         sheet_obj = wb.active | ||||
|  | ||||
|         i = 1 | ||||
|         row = 2 | ||||
|         while sheet_obj.cell(row=row, column=1).value: | ||||
|             data = {} | ||||
|             while sheet_obj.cell(row=row, column=i).value: | ||||
|                 column_title = sheet_obj.cell(row=1, column=i).value.strip().lower() | ||||
|                 column_row_value = sheet_obj.cell(row=row, column=i).value | ||||
|                 data[column_title] = column_row_value | ||||
|  | ||||
|                 i += 1 | ||||
|  | ||||
|             extras = {} | ||||
|             if data.get('xpath'): | ||||
|                 #@todo split by || ? | ||||
|                 extras['include_filters'] = [data.get('xpath')] | ||||
|             if data.get('name'): | ||||
|                 extras['title'] = [data.get('name').strip()] | ||||
|             if data.get('interval (min)'): | ||||
|                 minutes = int(data.get('interval (min)')) | ||||
|                 hours, minutes = divmod(minutes, 60) | ||||
|                 days, hours = divmod(hours, 24) | ||||
|                 weeks, days = divmod(days, 7) | ||||
|                 extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0} | ||||
|  | ||||
|  | ||||
|             # At minimum a URL is required. | ||||
|             if data.get('url'): | ||||
|                 try: | ||||
|                     validate_url(data.get('url')) | ||||
|                 except ValidationError as e: | ||||
|                     print(">> import URL error", data.get('url'), str(e)) | ||||
|                     # Don't bother processing anything else on this row | ||||
|                     continue | ||||
|  | ||||
|                 new_uuid = datastore.add_watch(url=data['url'].strip(), | ||||
|                                                extras=extras, | ||||
|                                                tag=data.get('folder'), | ||||
|                                                write_to_disk_now=False) | ||||
|                 if new_uuid: | ||||
|                     # Straight into the queue. | ||||
|                     self.new_uuids.append(new_uuid) | ||||
|                     good += 1 | ||||
|  | ||||
|             row += 1 | ||||
|             i = 1 | ||||
|  | ||||
|  | ||||
|         flash( | ||||
|             "{} imported from Wachete .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now)) | ||||
|  | ||||
| class import_xlsx_custom(Importer): | ||||
|  | ||||
|     def run(self, | ||||
|             data, | ||||
|             flash, | ||||
|             datastore, | ||||
|             ): | ||||
|         good = 0 | ||||
|         now = time.time() | ||||
|         self.new_uuids = [] | ||||
|  | ||||
|         from openpyxl import load_workbook | ||||
|  | ||||
|         try: | ||||
|             wb = load_workbook(data) | ||||
|         except Exception as e: | ||||
|             #@todo correct except | ||||
|             flash("Unable to read export XLSX file, something wrong with the file?", 'error') | ||||
|             return | ||||
|  | ||||
|         # @todo cehck atleast 2 rows, same in other method | ||||
|  | ||||
|         sheet_obj = wb.active | ||||
|         from .forms import validate_url | ||||
|         row = 2 | ||||
|         while sheet_obj.cell(row=row, column=1).value: | ||||
|             url = None | ||||
|             tags = None | ||||
|             extras = {} | ||||
|             for col_i, cell_map in self.import_profile.items(): | ||||
|                 cell_val = sheet_obj.cell(row=row, column=col_i).value | ||||
|                 if cell_map == 'url': | ||||
|                     url = cell_val.strip() | ||||
|                     try: | ||||
|                         validate_url(url) | ||||
|                     except ValidationError as e: | ||||
|                         print (">> Import URL error",url, str(e)) | ||||
|                         # Don't bother processing anything else on this row | ||||
|                         url = None | ||||
|                         break | ||||
|  | ||||
|                 elif cell_map == 'tag': | ||||
|                     tags = cell_val.strip() | ||||
|                 elif cell_map == 'include_filters': | ||||
|                     # @todo validate? | ||||
|                     extras['include_filters'] = [cell_val.strip()] | ||||
|                 elif cell_map == 'interval_minutes': | ||||
|                     hours, minutes = divmod(int(cell_val), 60) | ||||
|                     days, hours = divmod(hours, 24) | ||||
|                     weeks, days = divmod(days, 7) | ||||
|                     extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0} | ||||
|                 else: | ||||
|                     extras[cell_map] = cell_val.strip() | ||||
|  | ||||
|             # At minimum a URL is required. | ||||
|             if url: | ||||
|                 new_uuid = datastore.add_watch(url=url, | ||||
|                                                extras=extras, | ||||
|                                                tag=tags, | ||||
|                                                write_to_disk_now=False) | ||||
|                 if new_uuid: | ||||
|                     # Straight into the queue. | ||||
|                     self.new_uuids.append(new_uuid) | ||||
|                     good += 1 | ||||
|  | ||||
|             row += 1 | ||||
|  | ||||
|         flash( | ||||
|             "{} imported from custom .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now)) | ||||
|   | ||||
| @@ -360,6 +360,8 @@ class ChangeDetectionStore: | ||||
|         if write_to_disk_now: | ||||
|             self.sync_to_json() | ||||
|  | ||||
|         print("added ", url) | ||||
|  | ||||
|         return new_uuid | ||||
|  | ||||
|     def visualselector_data_is_ready(self, watch_uuid): | ||||
|   | ||||
| @@ -85,6 +85,7 @@ | ||||
|               <a href="{{url_for('logout')}}" class="pure-menu-link">LOG OUT</a> | ||||
|             </li> | ||||
|           {% endif %} | ||||
|           {% if current_user.is_authenticated or not has_password %} | ||||
|           <li class="pure-menu-item pure-form" id="search-menu-item"> | ||||
|             <!-- We use GET here so it offers people a chance to set bookmarks etc --> | ||||
|             <form name="searchForm" action="" method="GET"> | ||||
| @@ -95,6 +96,7 @@ | ||||
|               </button> | ||||
|             </form> | ||||
|           </li> | ||||
|           {% endif %} | ||||
|           <li class="pure-menu-item"> | ||||
|             <button class="toggle-button" id ="toggle-light-mode" type="button" title="Toggle Light/Dark Mode"> | ||||
|               <span class="visually-hidden">Toggle light/dark mode</span> | ||||
|   | ||||
| @@ -455,15 +455,15 @@ Unavailable") }} | ||||
|                         <tbody> | ||||
|                         <tr> | ||||
|                             <td>Check count</td> | ||||
|                             <td>{{ watch.check_count }}</td> | ||||
|                             <td>{{ "{:,}".format( watch.check_count) }}</td> | ||||
|                         </tr> | ||||
|                         <tr> | ||||
|                             <td>Consecutive filter failures</td> | ||||
|                             <td>{{ watch.consecutive_filter_failures }}</td> | ||||
|                             <td>{{ "{:,}".format( watch.consecutive_filter_failures) }}</td> | ||||
|                         </tr> | ||||
|                         <tr> | ||||
|                             <td>History length</td> | ||||
|                             <td>{{ watch.history|length }}</td> | ||||
|                             <td>{{ "{:,}".format(watch.history|length) }}</td> | ||||
|                         </tr> | ||||
|                         <tr> | ||||
|                             <td>Last fetch time</td> | ||||
|   | ||||
| @@ -8,11 +8,12 @@ | ||||
|         <ul> | ||||
|             <li class="tab" id=""><a href="#url-list">URL List</a></li> | ||||
|             <li class="tab"><a href="#distill-io">Distill.io</a></li> | ||||
|             <li class="tab"><a href="#xlsx">.XLSX & Wachete</a></li> | ||||
|         </ul> | ||||
|     </div> | ||||
|  | ||||
|     <div class="box-wrap inner"> | ||||
|         <form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST"> | ||||
|         <form class="pure-form" action="{{url_for('import_page')}}" method="POST" enctype="multipart/form-data"> | ||||
|             <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"> | ||||
|             <div class="tab-pane-inner" id="url-list"> | ||||
|                     <legend> | ||||
| @@ -79,6 +80,42 @@ | ||||
| " rows="25">{{ original_distill_json }}</textarea> | ||||
|  | ||||
|             </div> | ||||
|             <div class="tab-pane-inner" id="xlsx"> | ||||
|             <fieldset> | ||||
|                 <div class="pure-control-group"> | ||||
|                 {{ render_field(form.xlsx_file, class="processor") }} | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_field(form.file_mapping, class="processor") }} | ||||
|                 </div> | ||||
|             </fieldset> | ||||
|                 <div class="pure-control-group"> | ||||
|                 <span class="pure-form-message-inline"> | ||||
|                     Table of custom column and data types mapping for the <strong>Custom mapping</strong> File mapping type. | ||||
|                 </span> | ||||
|                     <table style="border: 1px solid #aaa; padding: 0.5rem; border-radius: 4px;"> | ||||
|                         <tr> | ||||
|                             <td><strong>Column #</strong></td> | ||||
|                             {% for n in range(4) %} | ||||
|                                 <td><input type="number" name="custom_xlsx[col_{{n}}]" style="width: 4rem;" min="1"></td> | ||||
|                             {%  endfor %} | ||||
|                         </tr> | ||||
|                         <tr> | ||||
|                             <td><strong>Type</strong></td> | ||||
|                             {% for n in range(4) %} | ||||
|                                 <td><select name="custom_xlsx[col_type_{{n}}]"> | ||||
|                                     <option value="" style="color: #aaa"> -- none --</option> | ||||
|                                     <option value="url">URL</option> | ||||
|                                     <option value="title">Title</option> | ||||
|                                     <option value="include_filter">CSS/xPath filter</option> | ||||
|                                     <option value="tag">Group / Tag name(s)</option> | ||||
|                                     <option value="interval_minutes">Recheck time (minutes)</option> | ||||
|                                 </select></td> | ||||
|                             {%  endfor %} | ||||
|                         </tr> | ||||
|                     </table> | ||||
|                 </div> | ||||
|             </div> | ||||
|             <button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button> | ||||
|         </form> | ||||
|  | ||||
|   | ||||
| @@ -178,13 +178,18 @@ | ||||
|             </tbody> | ||||
|         </table> | ||||
|         <ul id="post-list-buttons"> | ||||
|             {% if errored_count %} | ||||
|             <li> | ||||
|                 <a href="{{url_for('index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error ">With errors ({{ errored_count }})</a> | ||||
|             </li> | ||||
|             {% endif %} | ||||
|             {% if has_unviewed %} | ||||
|             <li> | ||||
|                 <a href="{{url_for('mark_all_viewed', tag=request.args.get('tag')) }}" class="pure-button button-tag ">Mark all viewed</a> | ||||
|                 <a href="{{url_for('mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Mark all viewed</a> | ||||
|             </li> | ||||
|             {% endif %} | ||||
|             <li> | ||||
|                <a href="{{ url_for('form_watch_checknow', tag=active_tag) }}" class="pure-button button-tag ">Recheck | ||||
|                <a href="{{ url_for('form_watch_checknow', tag=active_tag, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Recheck | ||||
|                 all {% if active_tag%} in "{{tags[active_tag].title}}"{%endif%}</a> | ||||
|             </li> | ||||
|             <li> | ||||
|   | ||||
							
								
								
									
										
											BIN
										
									
								
								changedetectionio/tests/import/spreadsheet.xlsx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								changedetectionio/tests/import/spreadsheet.xlsx
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| @@ -1,4 +1,4 @@ | ||||
| from . util import live_server_setup, extract_UUID_from_client | ||||
| from .util import live_server_setup, extract_UUID_from_client, wait_for_all_checks | ||||
| from flask import url_for | ||||
| import time | ||||
|  | ||||
| @@ -19,10 +19,16 @@ def test_check_access_control(app, client, live_server): | ||||
|         ) | ||||
|  | ||||
|         assert b"1 Imported" in res.data | ||||
|         time.sleep(2) | ||||
|         res = client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|         time.sleep(3) | ||||
|         # causes a 'Popped wrong request context.' error when client. is accessed? | ||||
|         #wait_for_all_checks(client) | ||||
|  | ||||
|         res = c.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|         assert b'1 watches queued for rechecking.' in res.data | ||||
|         time.sleep(2) | ||||
|         time.sleep(3) | ||||
|         # causes a 'Popped wrong request context.' error when client. is accessed? | ||||
|         #wait_for_all_checks(client) | ||||
|  | ||||
|  | ||||
|         # Enable password check and diff page access bypass | ||||
|         res = c.post( | ||||
| @@ -42,7 +48,7 @@ def test_check_access_control(app, client, live_server): | ||||
|         assert b"Login" in res.data | ||||
|  | ||||
|         # The diff page should return something valid when logged out | ||||
|         res = client.get(url_for("diff_history_page", uuid="first")) | ||||
|         res = c.get(url_for("diff_history_page", uuid="first")) | ||||
|         assert b'Random content' in res.data | ||||
|  | ||||
|         # Check wrong password does not let us in | ||||
| @@ -83,6 +89,8 @@ def test_check_access_control(app, client, live_server): | ||||
|         res = c.get(url_for("logout"), | ||||
|             follow_redirects=True) | ||||
|  | ||||
|         assert b"Login" in res.data | ||||
|  | ||||
|         res = c.get(url_for("settings_page"), | ||||
|             follow_redirects=True) | ||||
|  | ||||
| @@ -160,5 +168,5 @@ def test_check_access_control(app, client, live_server): | ||||
|         assert b"Login" in res.data | ||||
|  | ||||
|         # The diff page should return something valid when logged out | ||||
|         res = client.get(url_for("diff_history_page", uuid="first")) | ||||
|         res = c.get(url_for("diff_history_page", uuid="first")) | ||||
|         assert b'Random content' not in res.data | ||||
|   | ||||
| @@ -202,3 +202,35 @@ def test_check_filter_and_regex_extract(client, live_server): | ||||
|  | ||||
|     # Should not be here | ||||
|     assert b'Some text that did change' not in res.data | ||||
|  | ||||
|  | ||||
|  | ||||
| def test_regex_error_handling(client, live_server): | ||||
|  | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     ### test regex error handling | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"extract_text": '/something bad\d{3/XYZ', | ||||
|               "url": test_url, | ||||
|               "fetch_backend": "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     with open('/tmp/fuck.html', 'wb') as f: | ||||
|         f.write(res.data) | ||||
|  | ||||
|     assert b'is not a valid regular expression.' in res.data | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|   | ||||
| @@ -1,16 +1,19 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import io | ||||
| import os | ||||
| import time | ||||
|  | ||||
| from flask import url_for | ||||
|  | ||||
| from .util import live_server_setup | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def test_setup(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
| def test_import(client, live_server): | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
| @@ -119,3 +122,82 @@ def test_import_distillio(client, live_server): | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     # Clear flask alerts | ||||
|     res = client.get(url_for("index")) | ||||
|  | ||||
| def test_import_custom_xlsx(client, live_server): | ||||
|     """Test can upload a excel spreadsheet and the watches are created correctly""" | ||||
|  | ||||
|     #live_server_setup(live_server) | ||||
|     dirname = os.path.dirname(__file__) | ||||
|     filename = os.path.join(dirname, 'import/spreadsheet.xlsx') | ||||
|     with open(filename, 'rb') as f: | ||||
|  | ||||
|         data= { | ||||
|             'file_mapping': 'custom', | ||||
|             'custom_xlsx[col_0]': '1', | ||||
|             'custom_xlsx[col_1]': '3', | ||||
|             'custom_xlsx[col_2]': '5', | ||||
|             'custom_xlsx[col_3]': '4', | ||||
|             'custom_xlsx[col_type_0]': 'title', | ||||
|             'custom_xlsx[col_type_1]': 'url', | ||||
|             'custom_xlsx[col_type_2]': 'include_filters', | ||||
|             'custom_xlsx[col_type_3]': 'interval_minutes', | ||||
|             'xlsx_file': (io.BytesIO(f.read()), 'spreadsheet.xlsx') | ||||
|         } | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data=data, | ||||
|         follow_redirects=True, | ||||
|     ) | ||||
|  | ||||
|     assert b'2 imported from custom .xlsx' in res.data | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("index") | ||||
|     ) | ||||
|  | ||||
|  | ||||
|     assert b'Somesite results ABC' in res.data | ||||
|     assert b'City news results' in res.data | ||||
|  | ||||
|     # Just find one to check over | ||||
|     for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items(): | ||||
|         if watch.get('title') == 'Somesite results ABC': | ||||
|             filters = watch.get('include_filters') | ||||
|             assert filters[0] == '/html[1]/body[1]/div[4]/div[1]/div[1]/div[1]||//*[@id=\'content\']/div[3]/div[1]/div[1]||//*[@id=\'content\']/div[1]' | ||||
|             assert watch.get('time_between_check') == {'weeks': 0, 'days': 1, 'hours': 6, 'minutes': 24, 'seconds': 0} | ||||
|  | ||||
| def test_import_watchete_xlsx(client, live_server): | ||||
|     """Test can upload a excel spreadsheet and the watches are created correctly""" | ||||
|  | ||||
|     #live_server_setup(live_server) | ||||
|     dirname = os.path.dirname(__file__) | ||||
|     filename = os.path.join(dirname, 'import/spreadsheet.xlsx') | ||||
|     with open(filename, 'rb') as f: | ||||
|  | ||||
|         data= { | ||||
|             'file_mapping': 'wachete', | ||||
|             'xlsx_file': (io.BytesIO(f.read()), 'spreadsheet.xlsx') | ||||
|         } | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data=data, | ||||
|         follow_redirects=True, | ||||
|     ) | ||||
|  | ||||
|     assert b'2 imported from Wachete .xlsx' in res.data | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("index") | ||||
|     ) | ||||
|  | ||||
|     assert b'Somesite results ABC' in res.data | ||||
|     assert b'City news results' in res.data | ||||
|  | ||||
|     # Just find one to check over | ||||
|     for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items(): | ||||
|         if watch.get('title') == 'Somesite results ABC': | ||||
|             filters = watch.get('include_filters') | ||||
|             assert filters[0] == '/html[1]/body[1]/div[4]/div[1]/div[1]/div[1]||//*[@id=\'content\']/div[3]/div[1]/div[1]||//*[@id=\'content\']/div[1]' | ||||
|             assert watch.get('time_between_check') == {'weeks': 0, 'days': 1, 'hours': 6, 'minutes': 24, 'seconds': 0} | ||||
|   | ||||
| @@ -1,18 +1,19 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| import os | ||||
| from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
|  | ||||
| def test_setup(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
| # Add a site in paused mode, add an invalid filter, we should still have visual selector data ready | ||||
| def test_visual_selector_content_ready(client, live_server): | ||||
|     import os | ||||
|     import json | ||||
|  | ||||
|     assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|     time.sleep(1) | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|  | ||||
|     # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url | ||||
|     test_url = "https://changedetection.io/ci-test/test-runjs.html" | ||||
| @@ -60,4 +61,75 @@ def test_visual_selector_content_ready(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b'notification_screenshot' in res.data | ||||
|     client.get( | ||||
|         url_for("form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
| def test_basic_browserstep(client, live_server): | ||||
|  | ||||
|     assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url | ||||
|     test_url = "https://changedetection.io/ci-test/test-runjs.html" | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Watch added in Paused state, saving will unpause" in res.data | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first", unpause_on_save=1), | ||||
|         data={ | ||||
|               "url": test_url, | ||||
|               "tags": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_webdriver", | ||||
|               'browser_steps-0-operation': 'Goto site', | ||||
|               'browser_steps-1-operation': 'Click element', | ||||
|               'browser_steps-1-selector': 'button[name=test-button]', | ||||
|               'browser_steps-1-optional_value': '' | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"unpaused" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|  | ||||
|     # Check HTML conversion detected and workd | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid=uuid), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"This text should be removed" not in res.data | ||||
|     assert b"I smell JavaScript because the button was pressed" in res.data | ||||
|  | ||||
|     # now test for 404 errors | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid=uuid, unpause_on_save=1), | ||||
|         data={ | ||||
|               "url": "https://changedetection.io/404", | ||||
|               "tags": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_webdriver", | ||||
|               'browser_steps-0-operation': 'Goto site', | ||||
|               'browser_steps-1-operation': 'Click element', | ||||
|               'browser_steps-1-selector': 'button[name=test-button]', | ||||
|               'browser_steps-1-optional_value': '' | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"unpaused" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'Error - 404' in res.data | ||||
|  | ||||
|     client.get( | ||||
|         url_for("form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -68,7 +68,7 @@ services: | ||||
|  | ||||
| #    browser-chrome: | ||||
| #        hostname: browser-chrome | ||||
| #        image: selenium/standalone-chrome-debug:3.141.59 | ||||
| #        image: selenium/standalone-chrome:4 | ||||
| #        environment: | ||||
| #            - VNC_NO_PASSWORD=1 | ||||
| #            - SCREEN_WIDTH=1920 | ||||
|   | ||||
| @@ -1,12 +1,12 @@ | ||||
| eventlet>=0.31.0 | ||||
| feedgen~=0.9 | ||||
| flask-compress | ||||
| flask-login~=0.5 | ||||
| flask-login~=0.6 | ||||
| flask-paginate | ||||
| flask_expects_json~=1.7 | ||||
| flask_restful | ||||
| flask_wtf | ||||
| flask~=2.0 | ||||
| flask~=2.3 | ||||
| inscriptis~=2.2 | ||||
| pytz | ||||
| timeago~=1.0 | ||||
| @@ -49,18 +49,14 @@ beautifulsoup4 | ||||
| # XPath filtering, lxml is required by bs4 anyway, but put it here to be safe. | ||||
| lxml | ||||
|  | ||||
| # 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0 | ||||
| selenium~=4.1.0 | ||||
| selenium~=4.14.0 | ||||
|  | ||||
| # https://stackoverflow.com/questions/71652965/importerror-cannot-import-name-safe-str-cmp-from-werkzeug-security/71653849#71653849 | ||||
| # ImportError: cannot import name 'safe_str_cmp' from 'werkzeug.security' | ||||
| # need to revisit flask login versions | ||||
| werkzeug~=2.0.0 | ||||
| werkzeug | ||||
|  | ||||
| # Templating, so far just in the URLs but in the future can be for the notifications also | ||||
| jinja2~=3.1 | ||||
| jinja2-time | ||||
|  | ||||
| openpyxl | ||||
| # https://peps.python.org/pep-0508/#environment-markers | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/1009 | ||||
| jq~=1.3; python_version >= "3.8" and sys_platform == "darwin" | ||||
|   | ||||
		Reference in New Issue
	
	Block a user