mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-10-30 22:27:52 +00:00
Compare commits
1 Commits
browserste
...
plugin-2nd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
70842193b0 |
@@ -16,6 +16,7 @@ import logging
|
||||
import os
|
||||
import pytz
|
||||
import queue
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import timeago
|
||||
@@ -80,6 +81,9 @@ csrf = CSRFProtect()
|
||||
csrf.init_app(app)
|
||||
notification_debug_log=[]
|
||||
|
||||
from pathlib import Path
|
||||
sys.path.append(os.path.join(Path.home(), 'changedetectionio-plugins'))
|
||||
|
||||
watch_api = Api(app, decorators=[csrf.exempt])
|
||||
|
||||
def init_app_secret(datastore_path):
|
||||
|
||||
@@ -76,7 +76,7 @@ class Watch(Resource):
|
||||
# Properties are not returned as a JSON, so add the required props manually
|
||||
watch['history_n'] = watch.history_n
|
||||
watch['last_changed'] = watch.last_changed
|
||||
|
||||
watch['viewed'] = watch.viewed
|
||||
return watch
|
||||
|
||||
@auth.check_token
|
||||
|
||||
@@ -97,7 +97,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
proxy=proxy)
|
||||
|
||||
# For test
|
||||
#browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||
#browsersteps_start_session['browserstepper'].action_goto_url(value="http://exbaseample.com?time="+str(time.time()))
|
||||
|
||||
return browsersteps_start_session
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
now = time.time()
|
||||
try:
|
||||
update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid)
|
||||
update_handler.call_browser()
|
||||
update_handler.fetch_content()
|
||||
# title, size is len contents not len xfer
|
||||
except content_fetcher.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 404:
|
||||
|
||||
@@ -4,10 +4,8 @@ import hashlib
|
||||
import re
|
||||
from changedetectionio import content_fetcher
|
||||
from copy import deepcopy
|
||||
from distutils.util import strtobool
|
||||
|
||||
class difference_detection_processor():
|
||||
|
||||
class difference_detection_processor_interface():
|
||||
browser_steps = None
|
||||
datastore = None
|
||||
fetcher = None
|
||||
@@ -15,52 +13,36 @@ class difference_detection_processor():
|
||||
watch = None
|
||||
xpath_data = None
|
||||
|
||||
def __init__(self, *args, datastore, watch_uuid, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, uuid, skip_when_checksum_same=True):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
changed_detected = False
|
||||
return changed_detected, update_obj, ''.encode('utf-8')
|
||||
|
||||
|
||||
class text_content_difference_detection_processor(difference_detection_processor_interface):
|
||||
|
||||
def __init__(self, *args, datastore, watch_uuid, prefer_fetch_backend, **kwargs):
|
||||
self.datastore = datastore
|
||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
self.prefer_fetch_backend = prefer_fetch_backend
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def call_browser(self):
|
||||
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
url = self.watch.link
|
||||
|
||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||
|
||||
# Proxy ID "key"
|
||||
preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
|
||||
|
||||
# Pluggable content self.fetcher
|
||||
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
||||
prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
|
||||
|
||||
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
||||
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
||||
browser_connection_url = None
|
||||
if prefer_fetch_backend.startswith('extra_browser_'):
|
||||
(t, key) = prefer_fetch_backend.split('extra_browser_')
|
||||
connection = list(
|
||||
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
||||
if connection:
|
||||
prefer_fetch_backend = 'base_html_playwright'
|
||||
browser_connection_url = connection[0].get('browser_connection_url')
|
||||
|
||||
|
||||
########################################
|
||||
# Attach the correct fetcher and proxy #
|
||||
########################################
|
||||
# Grab the right kind of 'fetcher', (playwright, requests, etc)
|
||||
if hasattr(content_fetcher, prefer_fetch_backend):
|
||||
fetcher_obj = getattr(content_fetcher, prefer_fetch_backend)
|
||||
if hasattr(content_fetcher, self.prefer_fetch_backend):
|
||||
fetcher_obj = getattr(content_fetcher, self.prefer_fetch_backend)
|
||||
else:
|
||||
# If the klass doesnt exist, just use a default
|
||||
fetcher_obj = getattr(content_fetcher, "html_requests")
|
||||
|
||||
|
||||
# Proxy ID "key"
|
||||
preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
|
||||
proxy_url = None
|
||||
if preferred_proxy_id:
|
||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
||||
@@ -69,9 +51,23 @@ class difference_detection_processor():
|
||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
||||
browser_connection_url=browser_connection_url
|
||||
browser_connection_url=None # Default, let each fetcher work it out
|
||||
)
|
||||
|
||||
def fetch_content(self):
|
||||
|
||||
url = self.watch.link
|
||||
|
||||
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
||||
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
||||
if self.prefer_fetch_backend.startswith('extra_browser_'):
|
||||
(t, key) = self.prefer_fetch_backend.split('extra_browser_')
|
||||
connection = list(
|
||||
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
||||
if connection:
|
||||
prefer_fetch_backend = 'base_html_playwright'
|
||||
browser_connection_url = connection[0].get('browser_connection_url')
|
||||
|
||||
if self.watch.has_browser_steps:
|
||||
self.fetcher.browser_steps = self.watch.get('browser_steps', [])
|
||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||
@@ -115,14 +111,6 @@ class difference_detection_processor():
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, uuid, skip_when_checksum_same=True):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
changed_detected = False
|
||||
return changed_detected, update_obj, ''.encode('utf-8')
|
||||
|
||||
|
||||
def available_processors():
|
||||
from . import restock_diff, text_json_diff
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
|
||||
import hashlib
|
||||
import urllib3
|
||||
from . import difference_detection_processor
|
||||
#from . import browser_content_difference_detection_processor
|
||||
from copy import deepcopy
|
||||
from . import text_content_difference_detection_processor
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
@@ -15,7 +16,7 @@ class UnableToExtractRestockData(Exception):
|
||||
self.status_code = status_code
|
||||
return
|
||||
|
||||
class perform_site_check(difference_detection_processor):
|
||||
class perform_site_check(text_content_difference_detection_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
|
||||
@@ -10,8 +10,8 @@ import urllib3
|
||||
from changedetectionio import content_fetcher, html_tools
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||
from copy import deepcopy
|
||||
from . import difference_detection_processor
|
||||
from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
|
||||
from . import text_content_difference_detection_processor
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
@@ -31,7 +31,7 @@ class PDFToHTMLToolNotFound(ValueError):
|
||||
|
||||
# Some common stuff here that can be moved to a base class
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check(difference_detection_processor):
|
||||
class perform_site_check(text_content_difference_detection_processor):
|
||||
|
||||
def run_changedetection(self, uuid, skip_when_checksum_same=True):
|
||||
changed_detected = False
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
import importlib
|
||||
import os
|
||||
import re
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
from distutils.util import strtobool
|
||||
|
||||
from changedetectionio import content_fetcher, html_tools
|
||||
|
||||
from .processors.text_json_diff import FilterNotFoundInResponse
|
||||
from .processors.restock_diff import UnableToExtractRestockData
|
||||
|
||||
@@ -15,6 +19,7 @@ from .processors.restock_diff import UnableToExtractRestockData
|
||||
import logging
|
||||
import sys
|
||||
|
||||
|
||||
class update_worker(threading.Thread):
|
||||
current_uuid = None
|
||||
|
||||
@@ -24,6 +29,7 @@ class update_worker(threading.Thread):
|
||||
self.app = app
|
||||
self.notification_q = notification_q
|
||||
self.datastore = datastore
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def queue_notification_for_watch(self, n_object, watch):
|
||||
@@ -209,7 +215,7 @@ class update_worker(threading.Thread):
|
||||
from .processors import text_json_diff, restock_diff
|
||||
|
||||
while not self.app.config.exit.is_set():
|
||||
update_handler = None
|
||||
change_processor = None
|
||||
|
||||
try:
|
||||
queued_item_data = self.q.get(block=False)
|
||||
@@ -230,35 +236,46 @@ class update_worker(threading.Thread):
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
# Processor is what we are using for detecting the "Change"
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file://', self.datastore.data['watching'][uuid].get('url', '').strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
prefer_fetch_backend = self.datastore.data['watching'][uuid].get('fetch_backend', 'system')
|
||||
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
||||
prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
|
||||
|
||||
processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff')
|
||||
# if system...
|
||||
|
||||
# Abort processing when the content was the same as the last fetch
|
||||
skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
|
||||
processor = 'cdio_whois_diff'
|
||||
|
||||
|
||||
# @todo some way to switch by name
|
||||
# Init a new 'difference_detection_processor'
|
||||
|
||||
if processor == 'restock_diff':
|
||||
update_handler = restock_diff.perform_site_check(datastore=self.datastore,
|
||||
watch_uuid=uuid
|
||||
)
|
||||
if processor in ['text_json_diff', 'restock_diff']:
|
||||
base_processor_module = f"changedetectionio.processors.{processor}"
|
||||
else:
|
||||
# Used as a default and also by some tests
|
||||
update_handler = text_json_diff.perform_site_check(datastore=self.datastore,
|
||||
watch_uuid=uuid
|
||||
)
|
||||
# Each plugin is one processor exactly
|
||||
base_processor_module = f"{processor}.processor"
|
||||
|
||||
# its correct that processor dictates which fethcer it uses i think
|
||||
|
||||
# these should inherit the right fetcher too
|
||||
module = importlib.import_module(base_processor_module)
|
||||
change_processor = getattr(module, 'perform_site_check')
|
||||
change_processor = change_processor(datastore=self.datastore,
|
||||
watch_uuid=uuid,
|
||||
prefer_fetch_backend=prefer_fetch_backend
|
||||
)
|
||||
|
||||
# Clear last errors (move to preflight func?)
|
||||
self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
|
||||
|
||||
update_handler.call_browser()
|
||||
|
||||
changed_detected, update_obj, contents = update_handler.run_changedetection(uuid,
|
||||
skip_when_checksum_same=skip_when_same_checksum,
|
||||
)
|
||||
skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
|
||||
# Each processor extends base class of the kind of fetcher it needs to run anyway
|
||||
change_processor.fetch_content()
|
||||
changed_detected, update_obj, contents = change_processor.run_changedetection(uuid,
|
||||
skip_when_checksum_same=skip_when_same_checksum
|
||||
)
|
||||
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
@@ -465,10 +482,10 @@ class update_worker(threading.Thread):
|
||||
})
|
||||
|
||||
# Always save the screenshot if it's available
|
||||
if update_handler.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
|
||||
if update_handler.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
|
||||
if change_processor.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=change_processor.screenshot)
|
||||
if change_processor.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=change_processor.xpath_data)
|
||||
|
||||
|
||||
self.current_uuid = None # Done
|
||||
|
||||
Reference in New Issue
Block a user