mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-10 03:27:14 +00:00
Compare commits
1 Commits
cross-plat
...
plugin-2nd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
70842193b0 |
@@ -16,6 +16,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import pytz
|
import pytz
|
||||||
import queue
|
import queue
|
||||||
|
import sys
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import timeago
|
import timeago
|
||||||
@@ -80,6 +81,9 @@ csrf = CSRFProtect()
|
|||||||
csrf.init_app(app)
|
csrf.init_app(app)
|
||||||
notification_debug_log=[]
|
notification_debug_log=[]
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
sys.path.append(os.path.join(Path.home(), 'changedetectionio-plugins'))
|
||||||
|
|
||||||
watch_api = Api(app, decorators=[csrf.exempt])
|
watch_api = Api(app, decorators=[csrf.exempt])
|
||||||
|
|
||||||
def init_app_secret(datastore_path):
|
def init_app_secret(datastore_path):
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ class Watch(Resource):
|
|||||||
# Properties are not returned as a JSON, so add the required props manually
|
# Properties are not returned as a JSON, so add the required props manually
|
||||||
watch['history_n'] = watch.history_n
|
watch['history_n'] = watch.history_n
|
||||||
watch['last_changed'] = watch.last_changed
|
watch['last_changed'] = watch.last_changed
|
||||||
|
watch['viewed'] = watch.viewed
|
||||||
return watch
|
return watch
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
|
|||||||
@@ -97,7 +97,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
proxy=proxy)
|
proxy=proxy)
|
||||||
|
|
||||||
# For test
|
# For test
|
||||||
#browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
#browsersteps_start_session['browserstepper'].action_goto_url(value="http://exbaseample.com?time="+str(time.time()))
|
||||||
|
|
||||||
return browsersteps_start_session
|
return browsersteps_start_session
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
now = time.time()
|
now = time.time()
|
||||||
try:
|
try:
|
||||||
update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid)
|
update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid)
|
||||||
update_handler.call_browser()
|
update_handler.fetch_content()
|
||||||
# title, size is len contents not len xfer
|
# title, size is len contents not len xfer
|
||||||
except content_fetcher.Non200ErrorCodeReceived as e:
|
except content_fetcher.Non200ErrorCodeReceived as e:
|
||||||
if e.status_code == 404:
|
if e.status_code == 404:
|
||||||
|
|||||||
@@ -4,10 +4,8 @@ import hashlib
|
|||||||
import re
|
import re
|
||||||
from changedetectionio import content_fetcher
|
from changedetectionio import content_fetcher
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from distutils.util import strtobool
|
|
||||||
|
|
||||||
class difference_detection_processor():
|
|
||||||
|
|
||||||
|
class difference_detection_processor_interface():
|
||||||
browser_steps = None
|
browser_steps = None
|
||||||
datastore = None
|
datastore = None
|
||||||
fetcher = None
|
fetcher = None
|
||||||
@@ -15,52 +13,36 @@ class difference_detection_processor():
|
|||||||
watch = None
|
watch = None
|
||||||
xpath_data = None
|
xpath_data = None
|
||||||
|
|
||||||
def __init__(self, *args, datastore, watch_uuid, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
@abstractmethod
|
||||||
|
def run_changedetection(self, uuid, skip_when_checksum_same=True):
|
||||||
|
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||||
|
some_data = 'xxxxx'
|
||||||
|
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||||
|
changed_detected = False
|
||||||
|
return changed_detected, update_obj, ''.encode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
class text_content_difference_detection_processor(difference_detection_processor_interface):
|
||||||
|
|
||||||
|
def __init__(self, *args, datastore, watch_uuid, prefer_fetch_backend, **kwargs):
|
||||||
self.datastore = datastore
|
self.datastore = datastore
|
||||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||||
|
self.prefer_fetch_backend = prefer_fetch_backend
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
def call_browser(self):
|
########################################
|
||||||
|
# Attach the correct fetcher and proxy #
|
||||||
# Protect against file:// access
|
########################################
|
||||||
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
|
|
||||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
|
||||||
raise Exception(
|
|
||||||
"file:// type access is denied for security reasons."
|
|
||||||
)
|
|
||||||
|
|
||||||
url = self.watch.link
|
|
||||||
|
|
||||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
|
||||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
|
||||||
|
|
||||||
# Proxy ID "key"
|
|
||||||
preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
|
|
||||||
|
|
||||||
# Pluggable content self.fetcher
|
|
||||||
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
|
||||||
prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
|
|
||||||
|
|
||||||
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
|
||||||
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
|
||||||
browser_connection_url = None
|
|
||||||
if prefer_fetch_backend.startswith('extra_browser_'):
|
|
||||||
(t, key) = prefer_fetch_backend.split('extra_browser_')
|
|
||||||
connection = list(
|
|
||||||
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
|
||||||
if connection:
|
|
||||||
prefer_fetch_backend = 'base_html_playwright'
|
|
||||||
browser_connection_url = connection[0].get('browser_connection_url')
|
|
||||||
|
|
||||||
|
|
||||||
# Grab the right kind of 'fetcher', (playwright, requests, etc)
|
# Grab the right kind of 'fetcher', (playwright, requests, etc)
|
||||||
if hasattr(content_fetcher, prefer_fetch_backend):
|
if hasattr(content_fetcher, self.prefer_fetch_backend):
|
||||||
fetcher_obj = getattr(content_fetcher, prefer_fetch_backend)
|
fetcher_obj = getattr(content_fetcher, self.prefer_fetch_backend)
|
||||||
else:
|
else:
|
||||||
# If the klass doesnt exist, just use a default
|
# If the klass doesnt exist, just use a default
|
||||||
fetcher_obj = getattr(content_fetcher, "html_requests")
|
fetcher_obj = getattr(content_fetcher, "html_requests")
|
||||||
|
|
||||||
|
# Proxy ID "key"
|
||||||
|
preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
|
||||||
proxy_url = None
|
proxy_url = None
|
||||||
if preferred_proxy_id:
|
if preferred_proxy_id:
|
||||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
||||||
@@ -69,9 +51,23 @@ class difference_detection_processor():
|
|||||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||||
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
||||||
browser_connection_url=browser_connection_url
|
browser_connection_url=None # Default, let each fetcher work it out
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def fetch_content(self):
|
||||||
|
|
||||||
|
url = self.watch.link
|
||||||
|
|
||||||
|
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
||||||
|
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
||||||
|
if self.prefer_fetch_backend.startswith('extra_browser_'):
|
||||||
|
(t, key) = self.prefer_fetch_backend.split('extra_browser_')
|
||||||
|
connection = list(
|
||||||
|
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
||||||
|
if connection:
|
||||||
|
prefer_fetch_backend = 'base_html_playwright'
|
||||||
|
browser_connection_url = connection[0].get('browser_connection_url')
|
||||||
|
|
||||||
if self.watch.has_browser_steps:
|
if self.watch.has_browser_steps:
|
||||||
self.fetcher.browser_steps = self.watch.get('browser_steps', [])
|
self.fetcher.browser_steps = self.watch.get('browser_steps', [])
|
||||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||||
@@ -115,14 +111,6 @@ class difference_detection_processor():
|
|||||||
|
|
||||||
# After init, call run_changedetection() which will do the actual change-detection
|
# After init, call run_changedetection() which will do the actual change-detection
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def run_changedetection(self, uuid, skip_when_checksum_same=True):
|
|
||||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
|
||||||
some_data = 'xxxxx'
|
|
||||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
|
||||||
changed_detected = False
|
|
||||||
return changed_detected, update_obj, ''.encode('utf-8')
|
|
||||||
|
|
||||||
|
|
||||||
def available_processors():
|
def available_processors():
|
||||||
from . import restock_diff, text_json_diff
|
from . import restock_diff, text_json_diff
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import urllib3
|
import urllib3
|
||||||
from . import difference_detection_processor
|
#from . import browser_content_difference_detection_processor
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
from . import text_content_difference_detection_processor
|
||||||
|
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
@@ -15,7 +16,7 @@ class UnableToExtractRestockData(Exception):
|
|||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
return
|
return
|
||||||
|
|
||||||
class perform_site_check(difference_detection_processor):
|
class perform_site_check(text_content_difference_detection_processor):
|
||||||
screenshot = None
|
screenshot = None
|
||||||
xpath_data = None
|
xpath_data = None
|
||||||
|
|
||||||
|
|||||||
@@ -10,8 +10,8 @@ import urllib3
|
|||||||
from changedetectionio import content_fetcher, html_tools
|
from changedetectionio import content_fetcher, html_tools
|
||||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from . import difference_detection_processor
|
|
||||||
from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
|
from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
|
||||||
|
from . import text_content_difference_detection_processor
|
||||||
|
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
@@ -31,7 +31,7 @@ class PDFToHTMLToolNotFound(ValueError):
|
|||||||
|
|
||||||
# Some common stuff here that can be moved to a base class
|
# Some common stuff here that can be moved to a base class
|
||||||
# (set_proxy_from_list)
|
# (set_proxy_from_list)
|
||||||
class perform_site_check(difference_detection_processor):
|
class perform_site_check(text_content_difference_detection_processor):
|
||||||
|
|
||||||
def run_changedetection(self, uuid, skip_when_checksum_same=True):
|
def run_changedetection(self, uuid, skip_when_checksum_same=True):
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
|
|||||||
@@ -1,9 +1,13 @@
|
|||||||
|
import importlib
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import threading
|
import threading
|
||||||
import queue
|
import queue
|
||||||
import time
|
import time
|
||||||
|
from distutils.util import strtobool
|
||||||
|
|
||||||
from changedetectionio import content_fetcher, html_tools
|
from changedetectionio import content_fetcher, html_tools
|
||||||
|
|
||||||
from .processors.text_json_diff import FilterNotFoundInResponse
|
from .processors.text_json_diff import FilterNotFoundInResponse
|
||||||
from .processors.restock_diff import UnableToExtractRestockData
|
from .processors.restock_diff import UnableToExtractRestockData
|
||||||
|
|
||||||
@@ -15,6 +19,7 @@ from .processors.restock_diff import UnableToExtractRestockData
|
|||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
class update_worker(threading.Thread):
|
class update_worker(threading.Thread):
|
||||||
current_uuid = None
|
current_uuid = None
|
||||||
|
|
||||||
@@ -24,6 +29,7 @@ class update_worker(threading.Thread):
|
|||||||
self.app = app
|
self.app = app
|
||||||
self.notification_q = notification_q
|
self.notification_q = notification_q
|
||||||
self.datastore = datastore
|
self.datastore = datastore
|
||||||
|
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
def queue_notification_for_watch(self, n_object, watch):
|
def queue_notification_for_watch(self, n_object, watch):
|
||||||
@@ -209,7 +215,7 @@ class update_worker(threading.Thread):
|
|||||||
from .processors import text_json_diff, restock_diff
|
from .processors import text_json_diff, restock_diff
|
||||||
|
|
||||||
while not self.app.config.exit.is_set():
|
while not self.app.config.exit.is_set():
|
||||||
update_handler = None
|
change_processor = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
queued_item_data = self.q.get(block=False)
|
queued_item_data = self.q.get(block=False)
|
||||||
@@ -230,34 +236,45 @@ class update_worker(threading.Thread):
|
|||||||
now = time.time()
|
now = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Processor is what we are using for detecting the "Change"
|
# Protect against file:// access
|
||||||
processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff')
|
if re.search(r'^file://', self.datastore.data['watching'][uuid].get('url', '').strip(), re.IGNORECASE):
|
||||||
# if system...
|
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||||
|
raise Exception(
|
||||||
# Abort processing when the content was the same as the last fetch
|
"file:// type access is denied for security reasons."
|
||||||
skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
|
|
||||||
|
|
||||||
|
|
||||||
# @todo some way to switch by name
|
|
||||||
# Init a new 'difference_detection_processor'
|
|
||||||
|
|
||||||
if processor == 'restock_diff':
|
|
||||||
update_handler = restock_diff.perform_site_check(datastore=self.datastore,
|
|
||||||
watch_uuid=uuid
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
prefer_fetch_backend = self.datastore.data['watching'][uuid].get('fetch_backend', 'system')
|
||||||
|
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
||||||
|
prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
|
||||||
|
|
||||||
|
processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff')
|
||||||
|
|
||||||
|
processor = 'cdio_whois_diff'
|
||||||
|
|
||||||
|
if processor in ['text_json_diff', 'restock_diff']:
|
||||||
|
base_processor_module = f"changedetectionio.processors.{processor}"
|
||||||
else:
|
else:
|
||||||
# Used as a default and also by some tests
|
# Each plugin is one processor exactly
|
||||||
update_handler = text_json_diff.perform_site_check(datastore=self.datastore,
|
base_processor_module = f"{processor}.processor"
|
||||||
watch_uuid=uuid
|
|
||||||
|
# its correct that processor dictates which fethcer it uses i think
|
||||||
|
|
||||||
|
# these should inherit the right fetcher too
|
||||||
|
module = importlib.import_module(base_processor_module)
|
||||||
|
change_processor = getattr(module, 'perform_site_check')
|
||||||
|
change_processor = change_processor(datastore=self.datastore,
|
||||||
|
watch_uuid=uuid,
|
||||||
|
prefer_fetch_backend=prefer_fetch_backend
|
||||||
)
|
)
|
||||||
|
|
||||||
# Clear last errors (move to preflight func?)
|
# Clear last errors (move to preflight func?)
|
||||||
self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
|
self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
|
||||||
|
|
||||||
update_handler.call_browser()
|
skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
|
||||||
|
# Each processor extends base class of the kind of fetcher it needs to run anyway
|
||||||
changed_detected, update_obj, contents = update_handler.run_changedetection(uuid,
|
change_processor.fetch_content()
|
||||||
skip_when_checksum_same=skip_when_same_checksum,
|
changed_detected, update_obj, contents = change_processor.run_changedetection(uuid,
|
||||||
|
skip_when_checksum_same=skip_when_same_checksum
|
||||||
)
|
)
|
||||||
|
|
||||||
# Re #342
|
# Re #342
|
||||||
@@ -465,10 +482,10 @@ class update_worker(threading.Thread):
|
|||||||
})
|
})
|
||||||
|
|
||||||
# Always save the screenshot if it's available
|
# Always save the screenshot if it's available
|
||||||
if update_handler.screenshot:
|
if change_processor.screenshot:
|
||||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
|
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=change_processor.screenshot)
|
||||||
if update_handler.xpath_data:
|
if change_processor.xpath_data:
|
||||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
|
self.datastore.save_xpath_data(watch_uuid=uuid, data=change_processor.xpath_data)
|
||||||
|
|
||||||
|
|
||||||
self.current_uuid = None # Done
|
self.current_uuid = None # Done
|
||||||
|
|||||||
Reference in New Issue
Block a user