mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 06:37:41 +00:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			UI-browser
			...
			plugin-2nd
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 70842193b0 | 
| @@ -16,6 +16,7 @@ import logging | ||||
| import os | ||||
| import pytz | ||||
| import queue | ||||
| import sys | ||||
| import threading | ||||
| import time | ||||
| import timeago | ||||
| @@ -80,6 +81,9 @@ csrf = CSRFProtect() | ||||
| csrf.init_app(app) | ||||
| notification_debug_log=[] | ||||
|  | ||||
| from pathlib import Path | ||||
| sys.path.append(os.path.join(Path.home(), 'changedetectionio-plugins')) | ||||
|  | ||||
| watch_api = Api(app, decorators=[csrf.exempt]) | ||||
|  | ||||
| def init_app_secret(datastore_path): | ||||
|   | ||||
| @@ -76,7 +76,7 @@ class Watch(Resource): | ||||
|         # Properties are not returned as a JSON, so add the required props manually | ||||
|         watch['history_n'] = watch.history_n | ||||
|         watch['last_changed'] = watch.last_changed | ||||
|  | ||||
|         watch['viewed'] = watch.viewed | ||||
|         return watch | ||||
|  | ||||
|     @auth.check_token | ||||
|   | ||||
| @@ -97,7 +97,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|             proxy=proxy) | ||||
|  | ||||
|         # For test | ||||
|         #browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time())) | ||||
|         #browsersteps_start_session['browserstepper'].action_goto_url(value="http://exbaseample.com?time="+str(time.time())) | ||||
|  | ||||
|         return browsersteps_start_session | ||||
|  | ||||
|   | ||||
| @@ -41,7 +41,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|         now = time.time() | ||||
|         try: | ||||
|             update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid) | ||||
|             update_handler.call_browser() | ||||
|             update_handler.fetch_content() | ||||
|         # title, size is len contents not len xfer | ||||
|         except content_fetcher.Non200ErrorCodeReceived as e: | ||||
|             if e.status_code == 404: | ||||
|   | ||||
| @@ -4,10 +4,8 @@ import hashlib | ||||
| import re | ||||
| from changedetectionio import content_fetcher | ||||
| from copy import deepcopy | ||||
| from distutils.util import strtobool | ||||
|  | ||||
| class difference_detection_processor(): | ||||
|  | ||||
| class difference_detection_processor_interface(): | ||||
|     browser_steps = None | ||||
|     datastore = None | ||||
|     fetcher = None | ||||
| @@ -15,52 +13,36 @@ class difference_detection_processor(): | ||||
|     watch = None | ||||
|     xpath_data = None | ||||
|  | ||||
|     def __init__(self, *args, datastore, watch_uuid, **kwargs): | ||||
|         super().__init__(*args, **kwargs) | ||||
|  | ||||
|     @abstractmethod | ||||
|     def run_changedetection(self, uuid, skip_when_checksum_same=True): | ||||
|         update_obj = {'last_notification_error': False, 'last_error': False} | ||||
|         some_data = 'xxxxx' | ||||
|         update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() | ||||
|         changed_detected = False | ||||
|         return changed_detected, update_obj, ''.encode('utf-8') | ||||
|  | ||||
|  | ||||
| class text_content_difference_detection_processor(difference_detection_processor_interface): | ||||
|  | ||||
|     def __init__(self, *args, datastore, watch_uuid, prefer_fetch_backend, **kwargs): | ||||
|         self.datastore = datastore | ||||
|         self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid)) | ||||
|         self.prefer_fetch_backend = prefer_fetch_backend | ||||
|         super().__init__(*args, **kwargs) | ||||
|  | ||||
|     def call_browser(self): | ||||
|  | ||||
|         # Protect against file:// access | ||||
|         if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE): | ||||
|             if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')): | ||||
|                 raise Exception( | ||||
|                     "file:// type access is denied for security reasons." | ||||
|                 ) | ||||
|  | ||||
|         url = self.watch.link | ||||
|  | ||||
|         # Requests, playwright, other browser via wss:// etc, fetch_extra_something | ||||
|         prefer_fetch_backend = self.watch.get('fetch_backend', 'system') | ||||
|  | ||||
|         # Proxy ID "key" | ||||
|         preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid')) | ||||
|  | ||||
|         # Pluggable content self.fetcher | ||||
|         if not prefer_fetch_backend or prefer_fetch_backend == 'system': | ||||
|             prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend') | ||||
|  | ||||
|         # In the case that the preferred fetcher was a browser config with custom connection URL.. | ||||
|         # @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..) | ||||
|         browser_connection_url = None | ||||
|         if prefer_fetch_backend.startswith('extra_browser_'): | ||||
|             (t, key) = prefer_fetch_backend.split('extra_browser_') | ||||
|             connection = list( | ||||
|                 filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', []))) | ||||
|             if connection: | ||||
|                 prefer_fetch_backend = 'base_html_playwright' | ||||
|                 browser_connection_url = connection[0].get('browser_connection_url') | ||||
|  | ||||
|  | ||||
|         ######################################## | ||||
|         # Attach the correct fetcher and proxy # | ||||
|         ######################################## | ||||
|         # Grab the right kind of 'fetcher', (playwright, requests, etc) | ||||
|         if hasattr(content_fetcher, prefer_fetch_backend): | ||||
|             fetcher_obj = getattr(content_fetcher, prefer_fetch_backend) | ||||
|         if hasattr(content_fetcher, self.prefer_fetch_backend): | ||||
|             fetcher_obj = getattr(content_fetcher, self.prefer_fetch_backend) | ||||
|         else: | ||||
|             # If the klass doesnt exist, just use a default | ||||
|             fetcher_obj = getattr(content_fetcher, "html_requests") | ||||
|  | ||||
|  | ||||
|         # Proxy ID "key" | ||||
|         preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid')) | ||||
|         proxy_url = None | ||||
|         if preferred_proxy_id: | ||||
|             proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url') | ||||
| @@ -69,9 +51,23 @@ class difference_detection_processor(): | ||||
|         # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need. | ||||
|         # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc) | ||||
|         self.fetcher = fetcher_obj(proxy_override=proxy_url, | ||||
|                                    browser_connection_url=browser_connection_url | ||||
|                                    browser_connection_url=None # Default, let each fetcher work it out | ||||
|                                    ) | ||||
|  | ||||
|     def fetch_content(self): | ||||
|  | ||||
|         url = self.watch.link | ||||
|  | ||||
|         # In the case that the preferred fetcher was a browser config with custom connection URL.. | ||||
|         # @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..) | ||||
|         if self.prefer_fetch_backend.startswith('extra_browser_'): | ||||
|             (t, key) = self.prefer_fetch_backend.split('extra_browser_') | ||||
|             connection = list( | ||||
|                 filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', []))) | ||||
|             if connection: | ||||
|                 prefer_fetch_backend = 'base_html_playwright' | ||||
|                 browser_connection_url = connection[0].get('browser_connection_url') | ||||
|  | ||||
|         if self.watch.has_browser_steps: | ||||
|             self.fetcher.browser_steps = self.watch.get('browser_steps', []) | ||||
|             self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid')) | ||||
| @@ -115,14 +111,6 @@ class difference_detection_processor(): | ||||
|  | ||||
|         # After init, call run_changedetection() which will do the actual change-detection | ||||
|  | ||||
|     @abstractmethod | ||||
|     def run_changedetection(self, uuid, skip_when_checksum_same=True): | ||||
|         update_obj = {'last_notification_error': False, 'last_error': False} | ||||
|         some_data = 'xxxxx' | ||||
|         update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() | ||||
|         changed_detected = False | ||||
|         return changed_detected, update_obj, ''.encode('utf-8') | ||||
|  | ||||
|  | ||||
| def available_processors(): | ||||
|     from . import restock_diff, text_json_diff | ||||
|   | ||||
| @@ -1,8 +1,9 @@ | ||||
|  | ||||
| import hashlib | ||||
| import urllib3 | ||||
| from . import difference_detection_processor | ||||
| #from . import browser_content_difference_detection_processor | ||||
| from copy import deepcopy | ||||
| from . import text_content_difference_detection_processor | ||||
|  | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
|  | ||||
| @@ -15,7 +16,7 @@ class UnableToExtractRestockData(Exception): | ||||
|         self.status_code = status_code | ||||
|         return | ||||
|  | ||||
| class perform_site_check(difference_detection_processor): | ||||
| class perform_site_check(text_content_difference_detection_processor): | ||||
|     screenshot = None | ||||
|     xpath_data = None | ||||
|  | ||||
|   | ||||
| @@ -10,8 +10,8 @@ import urllib3 | ||||
| from changedetectionio import content_fetcher, html_tools | ||||
| from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT | ||||
| from copy import deepcopy | ||||
| from . import difference_detection_processor | ||||
| from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text | ||||
| from . import text_content_difference_detection_processor | ||||
|  | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
|  | ||||
| @@ -31,7 +31,7 @@ class PDFToHTMLToolNotFound(ValueError): | ||||
|  | ||||
| # Some common stuff here that can be moved to a base class | ||||
| # (set_proxy_from_list) | ||||
| class perform_site_check(difference_detection_processor): | ||||
| class perform_site_check(text_content_difference_detection_processor): | ||||
|  | ||||
|     def run_changedetection(self, uuid, skip_when_checksum_same=True): | ||||
|         changed_detected = False | ||||
|   | ||||
| @@ -1,9 +1,13 @@ | ||||
| import importlib | ||||
| import os | ||||
| import re | ||||
| import threading | ||||
| import queue | ||||
| import time | ||||
| from distutils.util import strtobool | ||||
|  | ||||
| from changedetectionio import content_fetcher, html_tools | ||||
|  | ||||
| from .processors.text_json_diff import FilterNotFoundInResponse | ||||
| from .processors.restock_diff import UnableToExtractRestockData | ||||
|  | ||||
| @@ -15,6 +19,7 @@ from .processors.restock_diff import UnableToExtractRestockData | ||||
| import logging | ||||
| import sys | ||||
|  | ||||
|  | ||||
| class update_worker(threading.Thread): | ||||
|     current_uuid = None | ||||
|  | ||||
| @@ -24,6 +29,7 @@ class update_worker(threading.Thread): | ||||
|         self.app = app | ||||
|         self.notification_q = notification_q | ||||
|         self.datastore = datastore | ||||
|  | ||||
|         super().__init__(*args, **kwargs) | ||||
|  | ||||
|     def queue_notification_for_watch(self, n_object, watch): | ||||
| @@ -209,7 +215,7 @@ class update_worker(threading.Thread): | ||||
|         from .processors import text_json_diff, restock_diff | ||||
|  | ||||
|         while not self.app.config.exit.is_set(): | ||||
|             update_handler = None | ||||
|             change_processor = None | ||||
|  | ||||
|             try: | ||||
|                 queued_item_data = self.q.get(block=False) | ||||
| @@ -230,35 +236,46 @@ class update_worker(threading.Thread): | ||||
|                     now = time.time() | ||||
|  | ||||
|                     try: | ||||
|                         # Processor is what we are using for detecting the "Change" | ||||
|                         # Protect against file:// access | ||||
|                         if re.search(r'^file://', self.datastore.data['watching'][uuid].get('url', '').strip(), re.IGNORECASE): | ||||
|                             if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')): | ||||
|                                 raise Exception( | ||||
|                                     "file:// type access is denied for security reasons." | ||||
|                                 ) | ||||
|  | ||||
|                         prefer_fetch_backend = self.datastore.data['watching'][uuid].get('fetch_backend', 'system') | ||||
|                         if not prefer_fetch_backend or prefer_fetch_backend == 'system': | ||||
|                             prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend') | ||||
|  | ||||
|                         processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff') | ||||
|                         # if system... | ||||
|  | ||||
|                         # Abort processing when the content was the same as the last fetch | ||||
|                         skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same') | ||||
|                         processor = 'cdio_whois_diff' | ||||
|  | ||||
|  | ||||
|                         # @todo some way to switch by name | ||||
|                         # Init a new 'difference_detection_processor' | ||||
|  | ||||
|                         if processor == 'restock_diff': | ||||
|                             update_handler = restock_diff.perform_site_check(datastore=self.datastore, | ||||
|                                                                              watch_uuid=uuid | ||||
|                                                                              ) | ||||
|                         if processor in ['text_json_diff', 'restock_diff']: | ||||
|                             base_processor_module = f"changedetectionio.processors.{processor}" | ||||
|                         else: | ||||
|                             # Used as a default and also by some tests | ||||
|                             update_handler = text_json_diff.perform_site_check(datastore=self.datastore, | ||||
|                                                                                watch_uuid=uuid | ||||
|                                                                                ) | ||||
|                             # Each plugin is one processor exactly | ||||
|                             base_processor_module = f"{processor}.processor" | ||||
|  | ||||
| # its correct that processor dictates which fethcer it uses i think | ||||
|  | ||||
|                         # these should inherit the right fetcher too | ||||
|                         module = importlib.import_module(base_processor_module) | ||||
|                         change_processor = getattr(module, 'perform_site_check') | ||||
|                         change_processor = change_processor(datastore=self.datastore, | ||||
|                                                             watch_uuid=uuid, | ||||
|                                                             prefer_fetch_backend=prefer_fetch_backend | ||||
|                                                             ) | ||||
|  | ||||
|                         # Clear last errors (move to preflight func?) | ||||
|                         self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None | ||||
|  | ||||
|                         update_handler.call_browser() | ||||
|  | ||||
|                         changed_detected, update_obj, contents = update_handler.run_changedetection(uuid, | ||||
|                                                                                     skip_when_checksum_same=skip_when_same_checksum, | ||||
|                                                                                     ) | ||||
|                         skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same') | ||||
|                         # Each processor extends base class of the kind of fetcher it needs to run anyway | ||||
|                         change_processor.fetch_content() | ||||
|                         changed_detected, update_obj, contents = change_processor.run_changedetection(uuid, | ||||
|                                                                                                       skip_when_checksum_same=skip_when_same_checksum | ||||
|                                                                                                       ) | ||||
|  | ||||
|                         # Re #342 | ||||
|                         # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. | ||||
| @@ -465,10 +482,10 @@ class update_worker(threading.Thread): | ||||
|                                                                            }) | ||||
|  | ||||
|                         # Always save the screenshot if it's available | ||||
|                         if update_handler.screenshot: | ||||
|                             self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot) | ||||
|                         if update_handler.xpath_data: | ||||
|                             self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data) | ||||
|                         if change_processor.screenshot: | ||||
|                             self.datastore.save_screenshot(watch_uuid=uuid, screenshot=change_processor.screenshot) | ||||
|                         if change_processor.xpath_data: | ||||
|                             self.datastore.save_xpath_data(watch_uuid=uuid, data=change_processor.xpath_data) | ||||
|  | ||||
|  | ||||
|                 self.current_uuid = None  # Done | ||||
|   | ||||
		Reference in New Issue
	
	Block a user