mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-28 20:33:22 +00:00
Compare commits
16 Commits
feature/fi
...
memusage-e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
44b2159140 | ||
|
|
3c9d2ded38 | ||
|
|
9f4364a130 | ||
|
|
5bd9eaf99d | ||
|
|
b1c51c0a65 | ||
|
|
232bd92389 | ||
|
|
e6173357a9 | ||
|
|
f2b8888aff | ||
|
|
9c46f175f9 | ||
|
|
1f27865fdf | ||
|
|
faa42d75e0 | ||
|
|
3b6e6d85bb | ||
|
|
30d6a272ce | ||
|
|
291700554e | ||
|
|
a82fad7059 | ||
|
|
c2fe5ae0d1 |
@@ -11,6 +11,8 @@ Live your data-life *pro-actively* instead of *re-actively*.
|
||||
|
||||
Free, Open-source web page monitoring, notification and change detection. Don't have time? [**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start)
|
||||
|
||||
[](https://discord.gg/vUNt4EtWMF) [ ](https://www.youtube.com/channel/UCbS09q1TRf0o4N2t-WA3emQ) [](https://www.linkedin.com/company/changedetection-io/)
|
||||
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
|
||||
|
||||
|
||||
1
changedetectionio/.gitignore
vendored
1
changedetectionio/.gitignore
vendored
@@ -1 +1,2 @@
|
||||
test-datastore
|
||||
package-lock.json
|
||||
|
||||
@@ -44,7 +44,7 @@ from flask_wtf import CSRFProtect
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.api import api_v1
|
||||
|
||||
__version__ = '0.39.16'
|
||||
__version__ = '0.39.17'
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -580,6 +580,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
if request.method == 'POST' and form.validate():
|
||||
extra_update_obj = {}
|
||||
|
||||
if request.args.get('unpause_on_save'):
|
||||
extra_update_obj['paused'] = False
|
||||
|
||||
# Re #110, if they submit the same as the default value, set it to None, so we continue to follow the default
|
||||
# Assume we use the default value, unless something relevant is different, then use the form value
|
||||
# values could be None, 0 etc.
|
||||
@@ -619,7 +622,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
datastore.data['watching'][uuid].update(form.data)
|
||||
datastore.data['watching'][uuid].update(extra_update_obj)
|
||||
|
||||
flash("Updated watch.")
|
||||
if request.args.get('unpause_on_save'):
|
||||
flash("Updated watch - unpaused!.")
|
||||
else:
|
||||
flash("Updated watch.")
|
||||
|
||||
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
|
||||
# But in the case something is added we should save straight away
|
||||
@@ -1063,9 +1069,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
|
||||
@app.route("/api/add", methods=['POST'])
|
||||
@app.route("/form/add/quickwatch", methods=['POST'])
|
||||
@login_required
|
||||
def form_watch_add():
|
||||
def form_quick_watch_add():
|
||||
from changedetectionio import forms
|
||||
form = forms.quickWatchForm(request.form)
|
||||
|
||||
@@ -1078,13 +1084,19 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
flash('The URL {} already exists'.format(url), "error")
|
||||
return redirect(url_for('index'))
|
||||
|
||||
# @todo add_watch should throw a custom Exception for validation etc
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip())
|
||||
if new_uuid:
|
||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
|
||||
|
||||
|
||||
if not add_paused and new_uuid:
|
||||
# Straight into the queue.
|
||||
update_q.put(new_uuid)
|
||||
flash("Watch added.")
|
||||
|
||||
if add_paused:
|
||||
flash('Watch added in Paused state, saving will unpause.')
|
||||
return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
|
||||
|
||||
return redirect(url_for('index'))
|
||||
|
||||
|
||||
|
||||
@@ -63,12 +63,12 @@ class Fetcher():
|
||||
break;
|
||||
}
|
||||
if('' !==r.id) {
|
||||
chained_css.unshift("#"+r.id);
|
||||
final_selector= chained_css.join('>');
|
||||
chained_css.unshift("#"+CSS.escape(r.id));
|
||||
final_selector= chained_css.join(' > ');
|
||||
// Be sure theres only one, some sites have multiples of the same ID tag :-(
|
||||
if (window.document.querySelectorAll(final_selector).length ==1 ) {
|
||||
return final_selector;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
} else {
|
||||
chained_css.unshift(r.tagName.toLowerCase());
|
||||
@@ -547,6 +547,43 @@ class html_requests(Fetcher):
|
||||
self.headers = r.headers
|
||||
|
||||
|
||||
# "html_requests" is listed as the default fetcher in store.py!
|
||||
class html_fetcher_with_weird_memory_leak(Fetcher):
|
||||
fetcher_description = "HTTP Fetcher with unexplainable memory leak"
|
||||
|
||||
def __init__(self, proxy_override=None):
|
||||
self.proxy_override = proxy_override
|
||||
|
||||
def run(self,
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_css_filter=None):
|
||||
|
||||
|
||||
self.status_code = 200
|
||||
|
||||
# Does nothing to help
|
||||
# with open('memory-leak.html', 'r', encoding="utf-8") as f:
|
||||
# with open('memory-leak.html', 'r') as f:
|
||||
|
||||
# Works but is binary (no good for me)
|
||||
with open('memory-leak.html', 'r') as f:
|
||||
wtf = f.read()
|
||||
|
||||
# just to prove gc.collect doesnt help, i dont even use 'wtf'
|
||||
del wtf
|
||||
wtf="not much"
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
self.content = "<html>foobar</html>"
|
||||
self.headers = {}
|
||||
self.xpath_data = '{}'
|
||||
|
||||
# Decide which is the 'real' HTML webdriver, this is more a system wide config
|
||||
# rather than site-specific.
|
||||
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
||||
|
||||
@@ -11,6 +11,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
# Some common stuff here that can be moved to a base class
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check():
|
||||
|
||||
def __init__(self, *args, datastore, **kwargs):
|
||||
@@ -45,6 +46,20 @@ class perform_site_check():
|
||||
|
||||
return proxy_args
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "foobar(?i)" type configuration
|
||||
def forward_slash_enclosed_regex_to_options(self, regex):
|
||||
res = re.search(r'^/(.*?)/(\w+)$', regex, re.IGNORECASE)
|
||||
|
||||
if res:
|
||||
regex = res.group(1)
|
||||
regex += '(?{})'.format(res.group(2))
|
||||
else:
|
||||
regex += '(?{})'.format('i')
|
||||
|
||||
return regex
|
||||
|
||||
|
||||
def run(self, uuid):
|
||||
timestamp = int(time.time()) # used for storage etc too
|
||||
|
||||
@@ -215,15 +230,27 @@ class perform_site_check():
|
||||
if len(extract_text) > 0:
|
||||
regex_matched_output = []
|
||||
for s_re in extract_text:
|
||||
result = re.findall(s_re.encode('utf8'), stripped_text_from_html,
|
||||
flags=re.MULTILINE | re.DOTALL | re.LOCALE)
|
||||
if result:
|
||||
regex_matched_output = regex_matched_output + result
|
||||
# incase they specified something in '/.../x'
|
||||
regex = self.forward_slash_enclosed_regex_to_options(s_re)
|
||||
result = re.findall(regex.encode('utf-8'), stripped_text_from_html)
|
||||
|
||||
for l in result:
|
||||
if type(l) is tuple:
|
||||
#@todo - some formatter option default (between groups)
|
||||
regex_matched_output += list(l) + [b'\n']
|
||||
else:
|
||||
# @todo - some formatter option default (between each ungrouped result)
|
||||
regex_matched_output += [l] + [b'\n']
|
||||
|
||||
# Now we will only show what the regex matched
|
||||
stripped_text_from_html = b''
|
||||
text_content_before_ignored_filter = b''
|
||||
if regex_matched_output:
|
||||
stripped_text_from_html = b'\n'.join(regex_matched_output)
|
||||
# @todo some formatter for presentation?
|
||||
stripped_text_from_html = b''.join(regex_matched_output)
|
||||
text_content_before_ignored_filter = stripped_text_from_html
|
||||
|
||||
|
||||
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
|
||||
|
||||
@@ -308,6 +308,9 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
class quickWatchForm(Form):
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tag = StringField('Group tag', [validators.Optional()])
|
||||
watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
# Common to a single watch and the global settings
|
||||
class commonSettingsForm(Form):
|
||||
|
||||
@@ -21,7 +21,7 @@ def css_filter(css_filter, html_content):
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
html_block = ""
|
||||
r = soup.select(css_filter, separator="")
|
||||
if len(r) == 0:
|
||||
if len(html_content) > 0 and len(r) == 0:
|
||||
raise FilterNotFoundInResponse(css_filter)
|
||||
for item in r:
|
||||
html_block += str(item)
|
||||
@@ -49,11 +49,18 @@ def xpath_filter(xpath_filter, html_content):
|
||||
html_block = ""
|
||||
|
||||
r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
|
||||
if len(r) == 0:
|
||||
raise FilterNotFoundInResponse(css_filter)
|
||||
if len(html_content) > 0 and len(r) == 0:
|
||||
raise FilterNotFoundInResponse(xpath_filter)
|
||||
|
||||
for item in r:
|
||||
html_block += etree.tostring(item, pretty_print=True).decode('utf-8') + "<br/>"
|
||||
#@note: //title/text() wont work where <title>CDATA..
|
||||
|
||||
for element in r:
|
||||
if type(element) == etree._ElementStringResult:
|
||||
html_block += str(element) + "<br/>"
|
||||
elif type(element) == etree._ElementUnicodeResult:
|
||||
html_block += str(element) + "<br/>"
|
||||
else:
|
||||
html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + "<br/>"
|
||||
|
||||
return html_block
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ class model(dict):
|
||||
'base_url' : None,
|
||||
'extract_title_as_title': False,
|
||||
'empty_pages_are_a_change': False,
|
||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||
'fetch_backend': 'html_fetcher_with_weird_memory_leak',
|
||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'global_subtractive_selectors': [],
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
import uuid as uuid_builder
|
||||
from distutils.util import strtobool
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
@@ -41,7 +42,7 @@ class model(dict):
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'text_should_not_be_present': [], # Text that should not present
|
||||
'fetch_backend': None,
|
||||
'filter_failure_notification_send': True,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'extract_title_as_title': False,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
@@ -171,13 +172,14 @@ class model(dict):
|
||||
|
||||
# Iterate over all history texts and see if something new exists
|
||||
def lines_contain_something_unique_compared_to_history(self, lines=[]):
|
||||
local_lines = [l.decode('utf-8').strip().lower() for l in lines]
|
||||
local_lines = set([l.decode('utf-8').strip().lower() for l in lines])
|
||||
|
||||
# Compare each lines (set) against each history text file (set) looking for something new..
|
||||
existing_history = set({})
|
||||
for k, v in self.history.items():
|
||||
alist = [line.decode('utf-8').strip().lower() for line in open(v, 'rb')]
|
||||
res = set(alist) != set(local_lines)
|
||||
if res:
|
||||
return True
|
||||
alist = set([line.decode('utf-8').strip().lower() for line in open(v, 'rb')])
|
||||
existing_history = existing_history.union(alist)
|
||||
|
||||
return False
|
||||
# Check that everything in local_lines(new stuff) already exists in existing_history - it should
|
||||
# if not, something new happened
|
||||
return not local_lines.issubset(existing_history)
|
||||
|
||||
@@ -78,7 +78,7 @@ def process_notification(n_object, datastore):
|
||||
n_title = n_title[0:payload_max_size]
|
||||
n_body = n_body[0:body_limit]
|
||||
|
||||
elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks'):
|
||||
elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith('https://discord.com/api'):
|
||||
# real limit is 2000, but minus some for extra metadata
|
||||
payload_max_size = 1700
|
||||
body_limit = max(0, payload_max_size - len(n_title))
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
/*
|
||||
* -- BASE STYLES --
|
||||
* Most of these are inherited from Base, but I want to change a few.
|
||||
* nvm use v14.18.1
|
||||
* npm install
|
||||
* npm run build
|
||||
* nvm use v14.18.1 && npm install && npm run build
|
||||
* or npm run watch
|
||||
*/
|
||||
body {
|
||||
@@ -203,13 +201,18 @@ body:after, body:before {
|
||||
border-radius: 10px;
|
||||
margin-bottom: 1em; }
|
||||
#new-watch-form input {
|
||||
width: auto !important;
|
||||
display: inline-block; }
|
||||
display: inline-block;
|
||||
margin-bottom: 5px; }
|
||||
#new-watch-form .label {
|
||||
display: none; }
|
||||
#new-watch-form legend {
|
||||
color: #fff;
|
||||
font-weight: bold; }
|
||||
#new-watch-form #watch-add-wrapper-zone > div {
|
||||
display: inline-block; }
|
||||
@media only screen and (max-width: 760px) {
|
||||
#new-watch-form #watch-add-wrapper-zone #url {
|
||||
width: 100%; } }
|
||||
|
||||
#diff-col {
|
||||
padding-left: 40px; }
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
/*
|
||||
* -- BASE STYLES --
|
||||
* Most of these are inherited from Base, but I want to change a few.
|
||||
* nvm use v14.18.1
|
||||
* npm install
|
||||
* npm run build
|
||||
* nvm use v14.18.1 && npm install && npm run build
|
||||
* or npm run watch
|
||||
*/
|
||||
body {
|
||||
@@ -269,8 +267,8 @@ body:after, body:before {
|
||||
border-radius: 10px;
|
||||
margin-bottom: 1em;
|
||||
input {
|
||||
width: auto !important;
|
||||
display: inline-block;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
.label {
|
||||
display: none;
|
||||
@@ -279,6 +277,17 @@ body:after, body:before {
|
||||
color: #fff;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
#watch-add-wrapper-zone {
|
||||
> div {
|
||||
display: inline-block;
|
||||
}
|
||||
@media only screen and (max-width: 760px) {
|
||||
#url {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -82,9 +82,8 @@ class ChangeDetectionStore:
|
||||
if include_default_watches:
|
||||
print("Creating JSON store at", self.datastore_path)
|
||||
|
||||
self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
|
||||
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
|
||||
self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io')
|
||||
for i in range(50):
|
||||
self.add_watch(url='https://changedetection.io/CHANGELOG.txt?x='+str(i), tag='test')
|
||||
|
||||
self.__data['version_tag'] = version_tag
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
|
||||
<div class="box-wrap inner">
|
||||
<form class="pure-form pure-form-stacked"
|
||||
action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next') ) }}" method="POST">
|
||||
action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next'), unpause_on_save = request.args.get('unpause_on_save')) }}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
|
||||
<div class="tab-pane-inner" id="general">
|
||||
@@ -163,15 +163,26 @@ User-Agent: wonderbra 1.0") }}
|
||||
</div>
|
||||
</fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
|
||||
class="m-d") }}
|
||||
{% set field = render_field(form.css_filter,
|
||||
placeholder=".class-name or #some-id, or other CSS selector rule.",
|
||||
class="m-d")
|
||||
%}
|
||||
{{ field }}
|
||||
{% if '/text()' in field %}
|
||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br/>
|
||||
{% endif %}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required, <a
|
||||
href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
|
||||
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
|
||||
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
|
||||
<ul>
|
||||
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
|
||||
href="http://xpather.com/" target="new">test your XPath here</a></li>
|
||||
<li>Example: Get all titles from an RSS feed <code>//title/text()</code></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
|
||||
@@ -239,8 +250,15 @@ Unavailable") }}
|
||||
{{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Extracts text in the final output after other filters using regular expressions, for example <code>\d+ online</code></li>
|
||||
<li>One line per regular-expression.</li>
|
||||
<li>Extracts text in the final output (line by line) after other filters using regular expressions;
|
||||
<ul>
|
||||
<li>Regular expression ‐ example <code>/reports.+?2022/i</code></li>
|
||||
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br/></li>
|
||||
<li>Keyword example ‐ example <code>Out of stock</code></li>
|
||||
<li>Use groups to extract just that text ‐ example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>One line per regular-expression/ string match</li>
|
||||
</ul>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@@ -1,18 +1,25 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.jinja' import render_simple_field %}
|
||||
{% from '_helpers.jinja' import render_simple_field, render_field %}
|
||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||
|
||||
<div class="box">
|
||||
|
||||
<form class="pure-form" action="{{ url_for('form_watch_add') }}" method="POST" id="new-watch-form">
|
||||
<form class="pure-form" action="{{ url_for('form_quick_watch_add') }}" method="POST" id="new-watch-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
<fieldset>
|
||||
<legend>Add a new change detection watch</legend>
|
||||
{{ render_simple_field(form.url, placeholder="https://...", required=true) }}
|
||||
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
|
||||
<button type="submit" class="pure-button pure-button-primary">Watch</button>
|
||||
<div id="watch-add-wrapper-zone">
|
||||
<div>
|
||||
{{ render_simple_field(form.url, placeholder="https://...", required=true) }}
|
||||
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
|
||||
</div>
|
||||
<div>
|
||||
{{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}
|
||||
{{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
<span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
|
||||
</form>
|
||||
|
||||
@@ -15,7 +15,7 @@ def set_original_response():
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<div id="sametext">Some text thats the same</div>
|
||||
<div id="changetext">Some text that will change</div>
|
||||
<div class="changetext">Some text that will change</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
@@ -33,7 +33,8 @@ def set_modified_response():
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<div id="sametext">Some text thats the same</div>
|
||||
<div id="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/> 2000 online )</div>
|
||||
<div class="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/> 2000 online )</div>
|
||||
<div class="changetext">SomeCase insensitive 3456</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
@@ -44,11 +45,78 @@ def set_modified_response():
|
||||
return None
|
||||
|
||||
|
||||
def test_check_filter_and_regex_extract(client, live_server):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
def set_multiline_response():
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
|
||||
<p>Something <br/>
|
||||
across 6 billion multiple<br/>
|
||||
lines
|
||||
</p>
|
||||
|
||||
<div>aaand something lines</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def test_setup(client, live_server):
|
||||
|
||||
live_server_setup(live_server)
|
||||
css_filter = "#changetext"
|
||||
|
||||
def test_check_filter_multiline(client, live_server):
|
||||
|
||||
set_multiline_response()
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": '',
|
||||
'extract_text': '/something.+?6 billion.+?lines/si',
|
||||
"url": test_url,
|
||||
"tag": "",
|
||||
"headers": "",
|
||||
'fetch_backend': "html_requests"
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Updated watch." in res.data
|
||||
time.sleep(3)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
|
||||
assert b'<div class="">Something' in res.data
|
||||
assert b'<div class="">across 6 billion multiple' in res.data
|
||||
assert b'<div class="">lines' in res.data
|
||||
|
||||
# but the last one, which also says 'lines' shouldnt be here (non-greedy match checking)
|
||||
assert b'aaand something lines' not in res.data
|
||||
|
||||
def test_check_filter_and_regex_extract(client, live_server):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
css_filter = ".changetext"
|
||||
|
||||
set_original_response()
|
||||
|
||||
@@ -64,6 +132,7 @@ def test_check_filter_and_regex_extract(client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(1)
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
@@ -75,7 +144,7 @@ def test_check_filter_and_regex_extract(client, live_server):
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": css_filter,
|
||||
'extract_text': '\d+ online\n\d+ guests',
|
||||
'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
|
||||
"url": test_url,
|
||||
"tag": "",
|
||||
"headers": "",
|
||||
@@ -86,15 +155,6 @@ def test_check_filter_and_regex_extract(client, live_server):
|
||||
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"),
|
||||
)
|
||||
assert b'\d+ online' in res.data
|
||||
|
||||
# Trigger a check
|
||||
# client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
@@ -126,5 +186,13 @@ def test_check_filter_and_regex_extract(client, live_server):
|
||||
# Both regexs should be here
|
||||
assert b'<div class="">80 guests' in res.data
|
||||
|
||||
# Regex with flag handling should be here
|
||||
assert b'<div class="">SomeCase insensitive 3456' in res.data
|
||||
|
||||
# Singular group from /somecase insensitive (345\d)/i
|
||||
assert b'<div class="">3456' in res.data
|
||||
|
||||
# Regex with multiline flag handling should be here
|
||||
|
||||
# Should not be here
|
||||
assert b'Some text that did change' not in res.data
|
||||
|
||||
@@ -22,12 +22,7 @@ def set_response_with_filter():
|
||||
f.write(test_return_data)
|
||||
return None
|
||||
|
||||
|
||||
# Hard to just add more live server URLs when one test is already running (I think)
|
||||
# So we add our test here (was in a different file)
|
||||
def test_check_notification(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
set_original_response()
|
||||
def run_filter_test(client, content_filter):
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
@@ -35,7 +30,7 @@ def test_check_notification(client, live_server):
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("form_watch_add"),
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tag": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
@@ -72,7 +67,7 @@ def test_check_notification(client, live_server):
|
||||
"tag": "my tag",
|
||||
"title": "my title",
|
||||
"headers": "",
|
||||
"css_filter": '#nope-doesnt-exist',
|
||||
"css_filter": content_filter,
|
||||
"fetch_backend": "html_requests"})
|
||||
|
||||
res = client.post(
|
||||
@@ -99,7 +94,7 @@ def test_check_notification(client, live_server):
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
notification = f.read()
|
||||
assert 'CSS/xPath filter was not present in the page' in notification
|
||||
assert '#nope-doesnt-exist' in notification
|
||||
assert content_filter.replace('"', '\\"') in notification
|
||||
|
||||
# Remove it and prove that it doesnt trigger when not expected
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
@@ -121,3 +116,19 @@ def test_check_notification(client, live_server):
|
||||
url_for("form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
|
||||
|
||||
def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
def test_check_css_filter_failure_notification(client, live_server):
|
||||
set_original_response()
|
||||
time.sleep(1)
|
||||
run_filter_test(client, '#nope-doesnt-exist')
|
||||
|
||||
def test_check_xpath_filter_failure_notification(client, live_server):
|
||||
set_original_response()
|
||||
time.sleep(1)
|
||||
run_filter_test(client, '//*[@id="nope-doesnt-exist"]')
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ def test_check_notification(client, live_server):
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("form_watch_add"),
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tag": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
@@ -172,7 +172,7 @@ def test_notification_validation(client, live_server):
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("form_watch_add"),
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tag": 'nice one'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
@@ -16,7 +16,7 @@ def test_check_notification_error_handling(client, live_server):
|
||||
# use a different URL so that it doesnt interfere with the actual check until we are ready
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("form_watch_add"),
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": "https://changedetection.io/CHANGELOG.txt", "tag": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
@@ -86,6 +86,7 @@ def test_check_xpath_filter_utf8(client, live_server):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(1)
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
@@ -99,6 +100,68 @@ def test_check_xpath_filter_utf8(client, live_server):
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
|
||||
# Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613
|
||||
def test_check_xpath_text_function_utf8(client, live_server):
|
||||
filter='//item/title/text()'
|
||||
|
||||
d='''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||
<channel>
|
||||
<title>rpilocator.com</title>
|
||||
<link>https://rpilocator.com</link>
|
||||
<description>Find Raspberry Pi Computers in Stock</description>
|
||||
<lastBuildDate>Thu, 19 May 2022 23:27:30 GMT</lastBuildDate>
|
||||
<image>
|
||||
<url>https://rpilocator.com/favicon.png</url>
|
||||
<title>rpilocator.com</title>
|
||||
<link>https://rpilocator.com/</link>
|
||||
<width>32</width>
|
||||
<height>32</height>
|
||||
</image>
|
||||
<item>
|
||||
<title>Stock Alert (UK): RPi CM4</title>
|
||||
<foo>something else unrelated</foo>
|
||||
</item>
|
||||
<item>
|
||||
<title>Stock Alert (UK): Big monitor</title>
|
||||
<foo>something else unrelated</foo>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>'''
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(d)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8")
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(1)
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
time.sleep(3)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'Unicode strings with encoding declaration are not supported.' not in res.data
|
||||
|
||||
# The service should echo back the request headers
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'<div class="">Stock Alert (UK): RPi CM4' in res.data
|
||||
assert b'<div class="">Stock Alert (UK): Big monitor' in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_check_markup_xpath_filter_restriction(client, live_server):
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
@@ -113,7 +113,6 @@ class update_worker(threading.Thread):
|
||||
err_text = "Page request from server didnt respond correctly"
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
|
||||
except Exception as e:
|
||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
231
memory-leak.html
Normal file
231
memory-leak.html
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user