mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-27 11:53:21 +00:00
Compare commits
1 Commits
0.39.13
...
toggle-fet
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
20a83ebcd2 |
@@ -40,7 +40,7 @@ from flask_wtf import CSRFProtect
|
||||
|
||||
from changedetectionio import html_tools
|
||||
|
||||
__version__ = '0.39.13'
|
||||
__version__ = '0.39.12'
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -518,31 +518,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
if all(value == 0 or value == None for value in datastore.data['watching'][uuid]['time_between_check'].values()):
|
||||
default['time_between_check'] = deepcopy(datastore.data['settings']['requests']['time_between_check'])
|
||||
|
||||
# Defaults for proxy choice
|
||||
if datastore.proxy_list is not None: # When enabled
|
||||
system_proxy = datastore.data['settings']['requests']['proxy']
|
||||
if default['proxy'] is None:
|
||||
default['proxy'] = system_proxy
|
||||
else:
|
||||
# Does the chosen one exist?
|
||||
if not any(default['proxy'] in tup for tup in datastore.proxy_list):
|
||||
default['proxy'] = datastore.proxy_list[0][0]
|
||||
|
||||
# Used by the form handler to keep or remove the proxy settings
|
||||
default['proxy_list'] = datastore.proxy_list
|
||||
|
||||
# proxy_override set to the json/text list of the items
|
||||
form = forms.watchForm(formdata=request.form if request.method == 'POST' else None,
|
||||
data=default,
|
||||
)
|
||||
data=default
|
||||
)
|
||||
|
||||
if datastore.proxy_list is None:
|
||||
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
|
||||
del form.proxy
|
||||
else:
|
||||
form.proxy.choices = datastore.proxy_list
|
||||
if default['proxy'] is None:
|
||||
form.proxy.default='http://hello'
|
||||
|
||||
if request.method == 'POST' and form.validate():
|
||||
extra_update_obj = {}
|
||||
@@ -622,28 +601,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
def settings_page():
|
||||
from changedetectionio import content_fetcher, forms
|
||||
|
||||
default = deepcopy(datastore.data['settings'])
|
||||
if datastore.proxy_list is not None:
|
||||
# When enabled
|
||||
system_proxy = datastore.data['settings']['requests']['proxy']
|
||||
# In the case it doesnt exist anymore
|
||||
if not any([system_proxy in tup for tup in datastore.proxy_list]):
|
||||
system_proxy = None
|
||||
|
||||
default['requests']['proxy'] = system_proxy if system_proxy is not None else datastore.proxy_list[0][0]
|
||||
# Used by the form handler to keep or remove the proxy settings
|
||||
default['proxy_list'] = datastore.proxy_list
|
||||
|
||||
|
||||
# Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status
|
||||
form = forms.globalSettingsForm(formdata=request.form if request.method == 'POST' else None,
|
||||
data=default
|
||||
data=datastore.data['settings']
|
||||
)
|
||||
if datastore.proxy_list is None:
|
||||
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
|
||||
del form.requests.form.proxy
|
||||
else:
|
||||
form.requests.form.proxy.choices = datastore.proxy_list
|
||||
|
||||
if request.method == 'POST':
|
||||
# Password unset is a GET, but we can lock the session to a salted env password to always need the password
|
||||
@@ -683,37 +644,44 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
@app.route("/import", methods=['GET', "POST"])
|
||||
@login_required
|
||||
def import_page():
|
||||
import validators
|
||||
remaining_urls = []
|
||||
|
||||
good = 0
|
||||
|
||||
if request.method == 'POST':
|
||||
from .importer import import_url_list, import_distill_io_json
|
||||
now=time.time()
|
||||
urls = request.values.get('urls').split("\n")
|
||||
|
||||
# URL List import
|
||||
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
||||
# Import and push into the queue for immediate update check
|
||||
importer = import_url_list()
|
||||
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
|
||||
for uuid in importer.new_uuids:
|
||||
update_q.put(uuid)
|
||||
if (len(urls) > 5000):
|
||||
flash("Importing 5,000 of the first URLs from your list, the rest can be imported again.")
|
||||
|
||||
if len(importer.remaining_data) == 0:
|
||||
return redirect(url_for('index'))
|
||||
else:
|
||||
remaining_urls = importer.remaining_data
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
url, *tags = url.split(" ")
|
||||
# Flask wtform validators wont work with basic auth, use validators package
|
||||
# Up to 5000 per batch so we dont flood the server
|
||||
if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=" ".join(tags), write_to_disk_now=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
update_q.put(new_uuid)
|
||||
good += 1
|
||||
continue
|
||||
|
||||
# Distill.io import
|
||||
if request.values.get('distill-io') and len(request.values.get('distill-io').strip()):
|
||||
# Import and push into the queue for immediate update check
|
||||
d_importer = import_distill_io_json()
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
for uuid in d_importer.new_uuids:
|
||||
update_q.put(uuid)
|
||||
if len(url.strip()):
|
||||
remaining_urls.append(url)
|
||||
|
||||
flash("{} Imported in {:.2f}s, {} Skipped.".format(good, time.time()-now,len(remaining_urls)))
|
||||
datastore.needs_write = True
|
||||
|
||||
if len(remaining_urls) == 0:
|
||||
# Looking good, redirect to index.
|
||||
return redirect(url_for('index'))
|
||||
|
||||
# Could be some remaining, or we could be on GET
|
||||
output = render_template("import.html",
|
||||
import_url_list_remaining="\n".join(remaining_urls),
|
||||
original_distill_json=''
|
||||
remaining="\n".join(remaining_urls)
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
@@ -25,8 +25,6 @@ class Fetcher():
|
||||
# Will be needed in the future by the VisualSelector, always get this where possible.
|
||||
screenshot = False
|
||||
fetcher_description = "No description"
|
||||
system_http_proxy = os.getenv('HTTP_PROXY')
|
||||
system_https_proxy = os.getenv('HTTPS_PROXY')
|
||||
|
||||
@abstractmethod
|
||||
def get_error(self):
|
||||
@@ -82,17 +80,21 @@ class base_html_playwright(Fetcher):
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
|
||||
|
||||
# try:
|
||||
# from playwright.sync_api import sync_playwright
|
||||
# except ModuleNotFoundError:
|
||||
# fetcher_enabled = False
|
||||
|
||||
browser_type = ''
|
||||
command_executor = ''
|
||||
|
||||
# Configs for Proxy setup
|
||||
# In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
|
||||
playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
|
||||
playwright_proxy_settings_mappings = ['server', 'bypass', 'username', 'password']
|
||||
|
||||
proxy = None
|
||||
|
||||
def __init__(self, proxy_override=None):
|
||||
|
||||
def __init__(self):
|
||||
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
||||
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
||||
self.command_executor = os.getenv(
|
||||
@@ -110,10 +112,6 @@ class base_html_playwright(Fetcher):
|
||||
if proxy_args:
|
||||
self.proxy = proxy_args
|
||||
|
||||
# allow per-watch proxy selection override
|
||||
if proxy_override:
|
||||
self.proxy = {'server': proxy_override}
|
||||
|
||||
def run(self,
|
||||
url,
|
||||
timeout,
|
||||
@@ -123,8 +121,6 @@ class base_html_playwright(Fetcher):
|
||||
ignore_status_codes=False):
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
import playwright._impl._api_types
|
||||
from playwright._impl._api_types import Error, TimeoutError
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser_type = getattr(p, self.browser_type)
|
||||
@@ -134,23 +130,16 @@ class base_html_playwright(Fetcher):
|
||||
browser = browser_type.connect_over_cdp(self.command_executor, timeout=timeout * 1000)
|
||||
|
||||
# Set user agent to prevent Cloudflare from blocking the browser
|
||||
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
||||
context = browser.new_context(
|
||||
user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0',
|
||||
user_agent="Mozilla/5.0",
|
||||
proxy=self.proxy
|
||||
)
|
||||
page = context.new_page()
|
||||
page.set_viewport_size({"width": 1280, "height": 1024})
|
||||
try:
|
||||
response = page.goto(url, timeout=timeout * 1000, wait_until='commit')
|
||||
# Wait_until = commit
|
||||
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
|
||||
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
|
||||
# This seemed to solve nearly all 'TimeoutErrors'
|
||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
|
||||
page.wait_for_timeout(extra_wait * 1000)
|
||||
except playwright._impl._api_types.TimeoutError as e:
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
response = page.goto(url, timeout=timeout * 1000)
|
||||
|
||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
|
||||
page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
if response is None:
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
@@ -182,7 +171,7 @@ class base_html_webdriver(Fetcher):
|
||||
'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
|
||||
proxy = None
|
||||
|
||||
def __init__(self, proxy_override=None):
|
||||
def __init__(self):
|
||||
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
|
||||
|
||||
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
||||
@@ -195,16 +184,6 @@ class base_html_webdriver(Fetcher):
|
||||
if v:
|
||||
proxy_args[k] = v.strip('"')
|
||||
|
||||
# Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
|
||||
if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
|
||||
proxy_args['httpProxy'] = self.system_http_proxy
|
||||
if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
|
||||
proxy_args['httpsProxy'] = self.system_https_proxy
|
||||
|
||||
# Allows override the proxy on a per-request basis
|
||||
if proxy_override is not None:
|
||||
proxy_args['httpProxy'] = proxy_override
|
||||
|
||||
if proxy_args:
|
||||
self.proxy = SeleniumProxy(raw=proxy_args)
|
||||
|
||||
@@ -272,9 +251,6 @@ class base_html_webdriver(Fetcher):
|
||||
class html_requests(Fetcher):
|
||||
fetcher_description = "Basic fast Plaintext/HTTP Client"
|
||||
|
||||
def __init__(self, proxy_override=None):
|
||||
self.proxy_override = proxy_override
|
||||
|
||||
def run(self,
|
||||
url,
|
||||
timeout,
|
||||
@@ -283,23 +259,11 @@ class html_requests(Fetcher):
|
||||
request_method,
|
||||
ignore_status_codes=False):
|
||||
|
||||
proxies={}
|
||||
|
||||
# Allows override the proxy on a per-request basis
|
||||
if self.proxy_override:
|
||||
proxies = {'http': self.proxy_override, 'https': self.proxy_override, 'ftp': self.proxy_override}
|
||||
else:
|
||||
if self.system_http_proxy:
|
||||
proxies['http'] = self.system_http_proxy
|
||||
if self.system_https_proxy:
|
||||
proxies['https'] = self.system_https_proxy
|
||||
|
||||
r = requests.request(method=request_method,
|
||||
data=request_body,
|
||||
url=url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
proxies=proxies,
|
||||
verify=False)
|
||||
|
||||
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
|
||||
|
||||
@@ -16,34 +16,6 @@ class perform_site_check():
|
||||
super().__init__(*args, **kwargs)
|
||||
self.datastore = datastore
|
||||
|
||||
# If there was a proxy list enabled, figure out what proxy_args/which proxy to use
|
||||
# if watch.proxy use that
|
||||
# fetcher.proxy_override = watch.proxy or main config proxy
|
||||
# Allows override the proxy on a per-request basis
|
||||
# ALWAYS use the first one is nothing selected
|
||||
|
||||
def set_proxy_from_list(self, watch):
|
||||
proxy_args = None
|
||||
if self.datastore.proxy_list is None:
|
||||
return None
|
||||
|
||||
# If its a valid one
|
||||
if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
|
||||
proxy_args = watch['proxy']
|
||||
|
||||
# not valid (including None), try the system one
|
||||
else:
|
||||
system_proxy = self.datastore.data['settings']['requests']['proxy']
|
||||
# Is not None and exists
|
||||
if any([system_proxy in p for p in self.datastore.proxy_list]):
|
||||
proxy_args = system_proxy
|
||||
|
||||
# Fallback - Did not resolve anything, use the first available
|
||||
if proxy_args is None:
|
||||
proxy_args = self.datastore.proxy_list[0][0]
|
||||
|
||||
return proxy_args
|
||||
|
||||
def run(self, uuid):
|
||||
timestamp = int(time.time()) # used for storage etc too
|
||||
|
||||
@@ -94,10 +66,7 @@ class perform_site_check():
|
||||
# If the klass doesnt exist, just use a default
|
||||
klass = getattr(content_fetcher, "html_requests")
|
||||
|
||||
proxy_args = self.set_proxy_from_list(watch)
|
||||
fetcher = klass(proxy_override=proxy_args)
|
||||
|
||||
# Proxy List support
|
||||
fetcher = klass()
|
||||
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code)
|
||||
|
||||
# Fetching complete, now filters
|
||||
@@ -149,13 +118,11 @@ class perform_site_check():
|
||||
# Then we assume HTML
|
||||
if has_filter_rule:
|
||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||
if css_filter_rule[0] == '/' or css_filter_rule.startswith('xpath:'):
|
||||
html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule.replace('xpath:', ''),
|
||||
html_content=fetcher.content)
|
||||
if css_filter_rule[0] == '/':
|
||||
html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule, html_content=fetcher.content)
|
||||
else:
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
|
||||
|
||||
if has_subtractive_selectors:
|
||||
html_content = html_tools.element_removal(subtractive_selectors, html_content)
|
||||
|
||||
@@ -175,6 +142,7 @@ class perform_site_check():
|
||||
# Re #340 - return the content before the 'ignore text' was applied
|
||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
||||
|
||||
|
||||
# Re #340 - return the content before the 'ignore text' was applied
|
||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
||||
|
||||
@@ -225,4 +193,4 @@ class perform_site_check():
|
||||
if not watch['title'] or not len(watch['title']):
|
||||
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
|
||||
|
||||
return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot
|
||||
return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot
|
||||
@@ -337,9 +337,9 @@ class watchForm(commonSettingsForm):
|
||||
method = SelectField('Request method', choices=valid_method, default=default_method)
|
||||
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
|
||||
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
|
||||
proxy = RadioField('Proxy')
|
||||
|
||||
def validate(self, **kwargs):
|
||||
if not super().validate():
|
||||
@@ -358,7 +358,6 @@ class watchForm(commonSettingsForm):
|
||||
# datastore.data['settings']['requests']..
|
||||
class globalSettingsRequestForm(Form):
|
||||
time_between_check = FormField(TimeBetweenCheckForm)
|
||||
proxy = RadioField('Proxy')
|
||||
|
||||
|
||||
# datastore.data['settings']['application']..
|
||||
@@ -383,3 +382,4 @@ class globalSettingsForm(Form):
|
||||
requests = FormField(globalSettingsRequestForm)
|
||||
application = FormField(globalSettingsApplicationForm)
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
@@ -1,133 +0,0 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import time
|
||||
import validators
|
||||
|
||||
|
||||
class Importer():
|
||||
remaining_data = []
|
||||
new_uuids = []
|
||||
good = 0
|
||||
|
||||
def __init__(self):
|
||||
self.new_uuids = []
|
||||
self.good = 0
|
||||
self.remaining_data = []
|
||||
|
||||
@abstractmethod
|
||||
def run(self,
|
||||
data,
|
||||
flash,
|
||||
datastore):
|
||||
pass
|
||||
|
||||
|
||||
class import_url_list(Importer):
|
||||
"""
|
||||
Imports a list, can be in <code>https://example.com tag1, tag2, last tag</code> format
|
||||
"""
|
||||
def run(self,
|
||||
data,
|
||||
flash,
|
||||
datastore,
|
||||
):
|
||||
|
||||
urls = data.split("\n")
|
||||
good = 0
|
||||
now = time.time()
|
||||
|
||||
if (len(urls) > 5000):
|
||||
flash("Importing 5,000 of the first URLs from your list, the rest can be imported again.")
|
||||
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
if not len(url):
|
||||
continue
|
||||
|
||||
tags = ""
|
||||
|
||||
# 'tags' should be a csv list after the URL
|
||||
if ' ' in url:
|
||||
url, tags = url.split(" ", 1)
|
||||
|
||||
# Flask wtform validators wont work with basic auth, use validators package
|
||||
# Up to 5000 per batch so we dont flood the server
|
||||
if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
good += 1
|
||||
continue
|
||||
|
||||
# Worked past the 'continue' above, append it to the bad list
|
||||
if self.remaining_data is None:
|
||||
self.remaining_data = []
|
||||
self.remaining_data.append(url)
|
||||
|
||||
flash("{} Imported from list in {:.2f}s, {} Skipped.".format(good, time.time() - now, len(self.remaining_data)))
|
||||
|
||||
|
||||
class import_distill_io_json(Importer):
|
||||
def run(self,
|
||||
data,
|
||||
flash,
|
||||
datastore,
|
||||
):
|
||||
|
||||
import json
|
||||
good = 0
|
||||
now = time.time()
|
||||
self.new_uuids=[]
|
||||
|
||||
|
||||
try:
|
||||
data = json.loads(data.strip())
|
||||
except json.decoder.JSONDecodeError:
|
||||
flash("Unable to read JSON file, was it broken?", 'error')
|
||||
return
|
||||
|
||||
if not data.get('data'):
|
||||
flash("JSON structure looks invalid, was it broken?", 'error')
|
||||
return
|
||||
|
||||
for d in data.get('data'):
|
||||
d_config = json.loads(d['config'])
|
||||
extras = {'title': d['name']}
|
||||
|
||||
if len(d['uri']) and good < 5000:
|
||||
try:
|
||||
# @todo we only support CSS ones at the moment
|
||||
if d_config['selections'][0]['frames'][0]['excludes'][0]['type'] == 'css':
|
||||
extras['subtractive_selectors'] = d_config['selections'][0]['frames'][0]['excludes'][0]['expr']
|
||||
except KeyError:
|
||||
pass
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
try:
|
||||
extras['css_filter'] = d_config['selections'][0]['frames'][0]['includes'][0]['expr']
|
||||
if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath':
|
||||
extras['css_filter'] = 'xpath:' + extras['css_filter']
|
||||
|
||||
except KeyError:
|
||||
pass
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
try:
|
||||
extras['tag'] = ", ".join(d['tags'])
|
||||
except KeyError:
|
||||
pass
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
new_uuid = datastore.add_watch(url=d['uri'].strip(),
|
||||
extras=extras,
|
||||
write_to_disk_now=False)
|
||||
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
good += 1
|
||||
|
||||
flash("{} Imported from Distill.io in {:.2f}s, {} Skipped.".format(len(self.new_uuids), time.time() - now, len(self.remaining_data)))
|
||||
@@ -23,8 +23,7 @@ class model(dict):
|
||||
'requests': {
|
||||
'timeout': 15, # Default 15 seconds
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
|
||||
'workers': 10, # Number of threads, lower is better for slow connections
|
||||
'proxy': None # Preferred proxy connection
|
||||
'workers': 10 # Number of threads, lower is better for slow connections
|
||||
},
|
||||
'application': {
|
||||
'password': False,
|
||||
|
||||
@@ -39,7 +39,6 @@ class model(dict):
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'fetch_backend': None,
|
||||
'extract_title_as_title': False,
|
||||
'proxy': None, # Preferred proxy connection
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
# Requires setting to None on submit if it's the same as the default
|
||||
# Should be all None by default, so we use the system default in this case.
|
||||
|
||||
@@ -309,10 +309,10 @@ footer {
|
||||
font-weight: bold; }
|
||||
.pure-form textarea {
|
||||
width: 100%; }
|
||||
.pure-form .inline-radio ul {
|
||||
.pure-form ul.fetch-backend {
|
||||
margin: 0px;
|
||||
list-style: none; }
|
||||
.pure-form .inline-radio ul li > * {
|
||||
.pure-form ul.fetch-backend li > * {
|
||||
display: inline-block; }
|
||||
|
||||
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 1024px) {
|
||||
|
||||
@@ -418,16 +418,14 @@ footer {
|
||||
textarea {
|
||||
width: 100%;
|
||||
}
|
||||
.inline-radio {
|
||||
ul {
|
||||
margin: 0px;
|
||||
list-style: none;
|
||||
li {
|
||||
> * {
|
||||
display: inline-block;
|
||||
}
|
||||
ul.fetch-backend {
|
||||
margin: 0px;
|
||||
list-style: none;
|
||||
li {
|
||||
> * {
|
||||
display: inline-block;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -33,7 +33,6 @@ class ChangeDetectionStore:
|
||||
self.needs_write = False
|
||||
self.datastore_path = datastore_path
|
||||
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
|
||||
self.proxy_list = None
|
||||
self.stop_thread = False
|
||||
|
||||
self.__data = App.model()
|
||||
@@ -112,14 +111,6 @@ class ChangeDetectionStore:
|
||||
secret = secrets.token_hex(16)
|
||||
self.__data['settings']['application']['rss_access_token'] = secret
|
||||
|
||||
|
||||
# Proxy list support - available as a selection in settings when text file is imported
|
||||
# CSV list
|
||||
# "name, address", or just "name"
|
||||
proxy_list_file = "{}/proxies.txt".format(self.datastore_path)
|
||||
if path.isfile(proxy_list_file):
|
||||
self.import_proxy_list(proxy_list_file)
|
||||
|
||||
# Bump the update version by running updates
|
||||
self.run_updates()
|
||||
|
||||
@@ -436,21 +427,6 @@ class ChangeDetectionStore:
|
||||
print ("Removing",item)
|
||||
unlink(item)
|
||||
|
||||
def import_proxy_list(self, filename):
|
||||
import csv
|
||||
with open(filename, newline='') as f:
|
||||
reader = csv.reader(f, skipinitialspace=True)
|
||||
# @todo This loop can could be improved
|
||||
l = []
|
||||
for row in reader:
|
||||
if len(row):
|
||||
if len(row)>=2:
|
||||
l.append(tuple(row[:2]))
|
||||
else:
|
||||
l.append(tuple([row[0], row[0]]))
|
||||
self.proxy_list = l if len(l) else None
|
||||
|
||||
|
||||
# Run all updates
|
||||
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
|
||||
# So therefor - each `update_n` should be very careful about checking if it needs to actually run
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
{% from '_helpers.jinja' import render_field %}
|
||||
|
||||
{% macro render_common_settings_form(form, current_base_url, emailprefix) %}
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.notification_urls, rows=5, placeholder="Examples:
|
||||
Gitter - gitter://token/room
|
||||
|
||||
@@ -58,21 +58,14 @@
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="request">
|
||||
<div class="pure-control-group inline-radio">
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
</span>
|
||||
</div>
|
||||
{% if form.proxy %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">
|
||||
Choose a proxy for this watch
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<fieldset class="pure-group" id="requests-override-options">
|
||||
<div class="pure-form-message-inline">
|
||||
<strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
|
||||
@@ -131,7 +124,7 @@ User-Agent: wonderbra 1.0") }}
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required, <a
|
||||
href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
|
||||
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
|
||||
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example <code>//*[contains(@class, 'sametext')]</code>, <a
|
||||
href="http://xpather.com/" target="new">test your XPath here</a></li>
|
||||
</ul>
|
||||
Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
|
||||
|
||||
@@ -1,86 +1,30 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block content %}
|
||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<div class="edit-form monospaced-textarea">
|
||||
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab" id="default-tab"><a href="#url-list">URL List</a></li>
|
||||
<li class="tab"><a href="#distill-io">Distill.io</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="box-wrap inner">
|
||||
<div class="edit-form">
|
||||
<div class="inner">
|
||||
<form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
<div class="tab-pane-inner" id="url-list">
|
||||
<fieldset class="pure-group">
|
||||
<legend>
|
||||
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
|
||||
(,):
|
||||
<br>
|
||||
<code>https://example.com tag1, tag2, last tag</code>
|
||||
<br>
|
||||
URLs which do not pass validation will stay in the textarea.
|
||||
</legend>
|
||||
<fieldset class="pure-group">
|
||||
<legend>
|
||||
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):
|
||||
<br>
|
||||
<code>https://example.com tag1, tag2, last tag</code>
|
||||
<br>
|
||||
URLs which do not pass validation will stay in the textarea.
|
||||
</legend>
|
||||
|
||||
|
||||
|
||||
<textarea name="urls" class="pure-input-1-2" placeholder="https://"
|
||||
style="width: 100%;
|
||||
<textarea name="urls" class="pure-input-1-2" placeholder="https://"
|
||||
style="width: 100%;
|
||||
font-family:monospace;
|
||||
white-space: pre;
|
||||
overflow-wrap: normal;
|
||||
overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea>
|
||||
</fieldset>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="distill-io">
|
||||
|
||||
|
||||
<fieldset class="pure-group">
|
||||
<legend>
|
||||
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.</br>
|
||||
This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
|
||||
<br/>
|
||||
<p>
|
||||
How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br/>
|
||||
Be sure to set your default fetcher to Chrome if required.</br>
|
||||
</p>
|
||||
</legend>
|
||||
|
||||
|
||||
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
|
||||
font-family:monospace;
|
||||
white-space: pre;
|
||||
overflow-wrap: normal;
|
||||
overflow-x: scroll;" placeholder="Example Distill.io JSON export file
|
||||
|
||||
{
|
||||
"client": {
|
||||
"local": 1
|
||||
},
|
||||
"data": [
|
||||
{
|
||||
"name": "Unraid | News",
|
||||
"uri": "https://unraid.net/blog",
|
||||
"config": "{\"selections\":[{\"frames\":[{\"index\":0,\"excludes\":[],\"includes\":[{\"type\":\"xpath\",\"expr\":\"(//div[@id='App']/div[contains(@class,'flex')]/main[contains(@class,'relative')]/section[contains(@class,'relative')]/div[@class='container']/div[contains(@class,'flex')]/div[contains(@class,'w-full')])[1]\"}]}],\"dynamic\":true,\"delay\":2}],\"ignoreEmptyText\":true,\"includeStyle\":false,\"dataAttr\":\"text\"}",
|
||||
"tags": [],
|
||||
"content_type": 2,
|
||||
"state": 40,
|
||||
"schedule": "{\"type\":\"INTERVAL\",\"params\":{\"interval\":4447}}",
|
||||
"ts": "2022-03-27T15:51:15.667Z"
|
||||
}
|
||||
]
|
||||
}
|
||||
" rows="25">{{ original_distill_json }}</textarea>
|
||||
</fieldset>
|
||||
</div>
|
||||
overflow-x: scroll;" rows="25">{{ remaining }}</textarea>
|
||||
</fieldset>
|
||||
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
|
||||
</form>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% endblock %}
|
||||
|
||||
@@ -60,14 +60,7 @@
|
||||
{{ render_checkbox_field(form.application.form.real_browser_save_screenshot) }}
|
||||
<span class="pure-form-message-inline">When using a Chrome browser, a screenshot from the last check will be available on the Diff page</span>
|
||||
</div>
|
||||
{% if form.requests.proxy %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">
|
||||
Choose a default proxy for all watches
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
@@ -80,7 +73,7 @@
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="fetching">
|
||||
<div class="pure-control-group inline-radio">
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
|
||||
|
||||
@@ -5,17 +5,18 @@ import time
|
||||
from flask import url_for
|
||||
|
||||
from .util import live_server_setup
|
||||
def test_setup(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
|
||||
def test_import(client, live_server):
|
||||
|
||||
live_server_setup(live_server)
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={
|
||||
"distill-io": "",
|
||||
"urls": """https://example.com
|
||||
https://example.com tag1
|
||||
https://example.com tag1, other tag"""
|
||||
@@ -25,96 +26,3 @@ https://example.com tag1, other tag"""
|
||||
assert b"3 Imported" in res.data
|
||||
assert b"tag1" in res.data
|
||||
assert b"other tag" in res.data
|
||||
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
# Clear flask alerts
|
||||
res = client.get( url_for("index"))
|
||||
res = client.get( url_for("index"))
|
||||
|
||||
def xtest_import_skip_url(client, live_server):
|
||||
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={
|
||||
"distill-io": "",
|
||||
"urls": """https://example.com
|
||||
:ht000000broken
|
||||
"""
|
||||
},
|
||||
follow_redirects=True,
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
assert b"ht000000broken" in res.data
|
||||
assert b"1 Skipped" in res.data
|
||||
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||
# Clear flask alerts
|
||||
res = client.get( url_for("index"))
|
||||
|
||||
def test_import_distillio(client, live_server):
|
||||
|
||||
distill_data='''
|
||||
{
|
||||
"client": {
|
||||
"local": 1
|
||||
},
|
||||
"data": [
|
||||
{
|
||||
"name": "Unraid | News",
|
||||
"uri": "https://unraid.net/blog",
|
||||
"config": "{\\"selections\\":[{\\"frames\\":[{\\"index\\":0,\\"excludes\\":[],\\"includes\\":[{\\"type\\":\\"xpath\\",\\"expr\\":\\"(//div[@id='App']/div[contains(@class,'flex')]/main[contains(@class,'relative')]/section[contains(@class,'relative')]/div[@class='container']/div[contains(@class,'flex')]/div[contains(@class,'w-full')])[1]\\"}]}],\\"dynamic\\":true,\\"delay\\":2}],\\"ignoreEmptyText\\":true,\\"includeStyle\\":false,\\"dataAttr\\":\\"text\\"}",
|
||||
"tags": ["nice stuff", "nerd-news"],
|
||||
"content_type": 2,
|
||||
"state": 40,
|
||||
"schedule": "{\\"type\\":\\"INTERVAL\\",\\"params\\":{\\"interval\\":4447}}",
|
||||
"ts": "2022-03-27T15:51:15.667Z"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
'''
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={
|
||||
"distill-io": distill_data,
|
||||
"urls" : ''
|
||||
},
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
|
||||
assert b"Unable to read JSON file, was it broken?" not in res.data
|
||||
assert b"1 Imported from Distill.io" in res.data
|
||||
|
||||
res = client.get( url_for("edit_page", uuid="first"))
|
||||
|
||||
assert b"https://unraid.net/blog" in res.data
|
||||
assert b"Unraid | News" in res.data
|
||||
|
||||
|
||||
# flask/wtforms should recode this, check we see it
|
||||
# wtforms encodes it like id=' ,but html.escape makes it like id='
|
||||
# - so just check it manually :(
|
||||
#import json
|
||||
#import html
|
||||
#d = json.loads(distill_data)
|
||||
# embedded_d=json.loads(d['data'][0]['config'])
|
||||
# x=html.escape(embedded_d['selections'][0]['frames'][0]['includes'][0]['expr']).encode('utf-8')
|
||||
assert b"xpath:(//div[@id='App']/div[contains(@class,'flex')]/main[contains(@class,'relative')]/section[contains(@class,'relative')]/div[@class='container']/div[contains(@class,'flex')]/div[contains(@class,'w-full')])[1]" in res.data
|
||||
|
||||
# did the tags work?
|
||||
res = client.get( url_for("index"))
|
||||
|
||||
assert b"nice stuff" in res.data
|
||||
assert b"nerd-news" in res.data
|
||||
|
||||
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||
# Clear flask alerts
|
||||
res = client.get(url_for("index"))
|
||||
|
||||
@@ -116,46 +116,4 @@ def test_xpath_validation(client, live_server):
|
||||
data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"is not a valid XPath expression" in res.data
|
||||
|
||||
|
||||
# actually only really used by the distll.io importer, but could be handy too
|
||||
def test_check_with_prefix_css_filter(client, live_server):
|
||||
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
set_original_response()
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(3)
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Updated watch." in res.data
|
||||
time.sleep(3)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
with open('/tmp/fuck.html', 'wb') as f:
|
||||
f.write(res.data)
|
||||
assert b"Some text thats the same" in res.data #in selector
|
||||
assert b"Some text that will change" not in res.data #not in selector
|
||||
|
||||
client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||
assert b"is not a valid XPath expression" in res.data
|
||||
@@ -17,14 +17,14 @@ services:
|
||||
# Alternative WebDriver/selenium URL, do not use "'s or 's!
|
||||
# - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
|
||||
#
|
||||
# WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_noProxy,
|
||||
# webdriver_proxyAutoconfigUrl, webdriver_autodetect,
|
||||
# WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_httpProxy, webdriver_noProxy,
|
||||
# webdriver_proxyAutoconfigUrl, webdriver_sslProxy, webdriver_autodetect,
|
||||
# webdriver_socksProxy, webdriver_socksUsername, webdriver_socksVersion, webdriver_socksPassword
|
||||
#
|
||||
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
|
||||
#
|
||||
# Alternative Playwright URL, do not use "'s or 's!
|
||||
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/
|
||||
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/playwright
|
||||
#
|
||||
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
|
||||
#
|
||||
|
||||
Reference in New Issue
Block a user