mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-01 07:08:47 +00:00
Compare commits
22 Commits
source-htm
...
python-sli
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d40773d595 | ||
|
|
a4c620c308 | ||
|
|
9434eac72d | ||
|
|
edb5e20de6 | ||
|
|
e62eeb1c4a | ||
|
|
a4e6fd1ec3 | ||
|
|
d8b9f0fd78 | ||
|
|
f9387522ee | ||
|
|
ba8d2e0c2d | ||
|
|
247db22a33 | ||
|
|
aeabd5b3fc | ||
|
|
e9e1ce893f | ||
|
|
b5a415c7b6 | ||
|
|
9e954532d6 | ||
|
|
955835df72 | ||
|
|
1aeafef910 | ||
|
|
1367197df7 | ||
|
|
143971123d | ||
|
|
04d2d3fb00 | ||
|
|
236f0c098d | ||
|
|
582c6b465b | ||
|
|
a021ba87fa |
5
.github/workflows/test-only.yml
vendored
5
.github/workflows/test-only.yml
vendored
@@ -55,6 +55,11 @@ jobs:
|
||||
# Playwright/Browserless fetch
|
||||
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
|
||||
|
||||
# Settings headers playwright tests - Call back in from Browserless, check headers
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "USE_EXPERIMENTAL_PUPPETEER_FETCH=yes" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
|
||||
# restock detection via playwright - added name=changedet here so that playwright/browserless can connect to it
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# pip dependencies install stage
|
||||
FROM python:3.10-slim as builder
|
||||
FROM python:3.10-slim-bookworm as builder
|
||||
|
||||
# See `cryptography` pin comment in requirements.txt
|
||||
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
@@ -29,10 +29,10 @@ RUN pip install --target=/dependencies playwright~=1.27.1 \
|
||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||
|
||||
# Final image stage
|
||||
FROM python:3.10-slim
|
||||
FROM python:3.10-slim-bookworm
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libssl1.1 \
|
||||
libssl3 \
|
||||
libxslt1.1 \
|
||||
# For pdftohtml
|
||||
poppler-utils \
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
|
||||
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start?src=pip)
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://changedetection.io)
|
||||
|
||||
|
||||
[**Don't have time? Let us host it for you! try our extremely affordable subscription use our proxies and support!**](https://lemonade.changedetection.io/start)
|
||||
[**Don't have time? Let us host it for you! try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
||||
|
||||
|
||||
#### Example use cases
|
||||
|
||||
15
README.md
15
README.md
@@ -5,13 +5,13 @@
|
||||
_Live your data-life pro-actively._
|
||||
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start?src=github)
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://changedetection.io?src=github)
|
||||
|
||||
[![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md)
|
||||
|
||||

|
||||
|
||||
[**Don't have time? Let us host it for you! try our $8.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
|
||||
[**Don't have time? Let us host it for you! try our $8.99/month subscription - use our proxies and support!**](https://changedetection.io) , _half the price of other website change monitoring services!_
|
||||
|
||||
- Chrome browser included.
|
||||
- Super fast, no registration needed setup.
|
||||
@@ -22,11 +22,11 @@ _Live your data-life pro-actively._
|
||||
|
||||
Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (included as part of our subscription service)
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/visualselector-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />](https://lemonade.changedetection.io/start?src=github)
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/visualselector-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />](https://changedetection.io?src=github)
|
||||
|
||||
### Easily see what changed, examine by word, line, or individual character.
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />](https://lemonade.changedetection.io/start?src=github)
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />](https://changedetection.io?src=github)
|
||||
|
||||
|
||||
### Perform interactive browser steps
|
||||
@@ -35,7 +35,7 @@ Fill in text boxes, click buttons and more, setup your changedetection scenario.
|
||||
|
||||
Using the **Browser Steps** configuration, add basic steps before performing change detection, such as logging into websites, adding a product to a cart, accept cookie logins, entering dates and refining searches.
|
||||
|
||||
[<img src="docs/browsersteps-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Website change detection with interactive browser steps, login, cookies etc" />](https://lemonade.changedetection.io/start?src=github)
|
||||
[<img src="docs/browsersteps-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Website change detection with interactive browser steps, login, cookies etc" />](https://changedetection.io?src=github)
|
||||
|
||||
After **Browser Steps** have been run, then visit the **Visual Selector** tab to refine the content you're interested in.
|
||||
Requires Playwright to be enabled.
|
||||
@@ -66,6 +66,7 @@ Requires Playwright to be enabled.
|
||||
- Proactively search for jobs, get notified when companies update their careers page, search job portals for keywords.
|
||||
- Get alerts when new job positions are open on Bamboo HR and other job platforms
|
||||
- Website defacement monitoring
|
||||
- Pokémon Card Restock Tracker / Pokémon TCG Tracker
|
||||
|
||||
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_
|
||||
|
||||
@@ -144,7 +145,7 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
|
||||
## Filters
|
||||
|
||||
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
||||
(We support LXML `re:test`, `re:math` and `re:replace`.)
|
||||
(We support LXML `re:test`, `re:match` and `re:replace`.)
|
||||
|
||||
## Notifications
|
||||
|
||||
@@ -237,7 +238,7 @@ Supports managing the website watch list [via our API](https://changedetection.i
|
||||
Do you use changedetection.io to make money? does it save you time or money? Does it make your life easier? less stressful? Remember, we write this software when we should be doing actual paid work, we have to buy food and pay rent just like you.
|
||||
|
||||
|
||||
Firstly, consider taking out a [change detection monthly subscription - unlimited checks and watches](https://lemonade.changedetection.io/start) , even if you don't use it, you still get the warm fuzzy feeling of helping out the project. (And who knows, you might just use it!)
|
||||
Firstly, consider taking out a [change detection monthly subscription - unlimited checks and watches](https://changedetection.io?src=github) , even if you don't use it, you still get the warm fuzzy feeling of helping out the project. (And who knows, you might just use it!)
|
||||
|
||||
Or directly donate an amount PayPal [](https://www.paypal.com/donate/?hosted_button_id=7CP6HR9ZCNDYJ)
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ from flask_paginate import Pagination, get_page_parameter
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.api import api_v1
|
||||
|
||||
__version__ = '0.41.1'
|
||||
__version__ = '0.42.3'
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -124,6 +124,15 @@ def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
|
||||
|
||||
return timeago.format(timestamp, time.time())
|
||||
|
||||
|
||||
@app.template_filter('pagination_slice')
|
||||
def _jinja2_filter_pagination_slice(arr, skip):
|
||||
per_page = datastore.data['settings']['application'].get('pager_size', 50)
|
||||
if per_page:
|
||||
return arr[skip:skip + per_page]
|
||||
|
||||
return arr
|
||||
|
||||
@app.template_filter('format_seconds_ago')
|
||||
def _jinja2_filter_seconds_precise(timestamp):
|
||||
if timestamp == False:
|
||||
@@ -403,27 +412,40 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
sorted_watches = []
|
||||
search_q = request.args.get('q').strip().lower() if request.args.get('q') else False
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
|
||||
if limit_tag != None:
|
||||
if limit_tag:
|
||||
# Support for comma separated list of tags.
|
||||
if watch['tag'] is None:
|
||||
if not watch.get('tag'):
|
||||
continue
|
||||
for tag_in_watch in watch['tag'].split(','):
|
||||
for tag_in_watch in watch.get('tag', '').split(','):
|
||||
tag_in_watch = tag_in_watch.strip()
|
||||
if tag_in_watch == limit_tag:
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
if search_q:
|
||||
if (watch.get('title') and search_q in watch.get('title').lower()) or search_q in watch.get('url', '').lower():
|
||||
sorted_watches.append(watch)
|
||||
else:
|
||||
sorted_watches.append(watch)
|
||||
|
||||
else:
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
#watch['uuid'] = uuid
|
||||
if search_q:
|
||||
if (watch.get('title') and search_q in watch.get('title').lower()) or search_q in watch.get('url', '').lower():
|
||||
sorted_watches.append(watch)
|
||||
else:
|
||||
sorted_watches.append(watch)
|
||||
|
||||
existing_tags = datastore.get_all_tags()
|
||||
form = forms.quickWatchForm(request.form)
|
||||
page = request.args.get(get_page_parameter(), type=int, default=1)
|
||||
total_count = len(sorted_watches) if sorted_watches else len(datastore.data['watching'])
|
||||
pagination = Pagination(page=page, total=total_count, per_page=int(os.getenv('pagination_per_page', 50)), css_framework = "semantic")
|
||||
total_count = len(sorted_watches)
|
||||
|
||||
pagination = Pagination(page=page,
|
||||
total=total_count,
|
||||
per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic")
|
||||
|
||||
|
||||
output = render_template(
|
||||
"watch-overview.html",
|
||||
@@ -437,6 +459,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
pagination=pagination,
|
||||
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
||||
search_q=request.args.get('q','').strip(),
|
||||
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||
@@ -690,6 +713,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
form=form,
|
||||
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
has_empty_checktime=using_default_check_time,
|
||||
has_extra_headers_file=watch.has_extra_headers_file or datastore.has_extra_headers_file,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
jq_support=jq_support,
|
||||
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||
@@ -1434,6 +1458,7 @@ def check_for_new_version():
|
||||
# Check daily
|
||||
app.config.exit.wait(86400)
|
||||
|
||||
|
||||
def notification_runner():
|
||||
global notification_debug_log
|
||||
from datetime import datetime
|
||||
|
||||
@@ -147,6 +147,13 @@ class Fetcher():
|
||||
def is_ready(self):
|
||||
return True
|
||||
|
||||
def get_all_headers(self):
|
||||
"""
|
||||
Get all headers but ensure all keys are lowercase
|
||||
:return:
|
||||
"""
|
||||
return {k.lower(): v for k, v in self.headers.items()}
|
||||
|
||||
def iterate_browser_steps(self):
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||
from playwright._impl._api_types import TimeoutError
|
||||
@@ -384,6 +391,7 @@ class base_html_playwright(Fetcher):
|
||||
self.headers = x.get('headers')
|
||||
self.instock_data = x.get('instock_data')
|
||||
self.screenshot = base64.b64decode(x.get('screenshot'))
|
||||
self.status_code = x.get('status_code')
|
||||
self.xpath_data = x.get('xpath_data')
|
||||
|
||||
else:
|
||||
|
||||
@@ -481,6 +481,10 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
ignore_whitespace = BooleanField('Ignore whitespace')
|
||||
password = SaltyPasswordField()
|
||||
pager_size = IntegerField('Pager size',
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
message="Should be atleast zero (disabled)")])
|
||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
|
||||
|
||||
@@ -137,12 +137,13 @@ def _get_stripped_text_from_json_match(match):
|
||||
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
|
||||
stripped_text_from_html = False
|
||||
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
||||
try:
|
||||
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
|
||||
except json.JSONDecodeError:
|
||||
|
||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||
# As a last resort, try to parse the whole <body>
|
||||
s = []
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
|
||||
@@ -150,32 +151,34 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
bs_result = soup.findAll('script', {"type": "application/ld+json"})
|
||||
else:
|
||||
bs_result = soup.findAll('script')
|
||||
bs_result += soup.findAll('body')
|
||||
|
||||
|
||||
if not bs_result:
|
||||
raise JSONNotFound("No parsable JSON found in this document")
|
||||
|
||||
bs_jsons = []
|
||||
for result in bs_result:
|
||||
# Skip empty tags, and things that dont even look like JSON
|
||||
if not result.string or not '{' in result.string:
|
||||
if not result.text or '{' not in result.text:
|
||||
continue
|
||||
|
||||
try:
|
||||
json_data = json.loads(result.string)
|
||||
json_data = json.loads(result.text)
|
||||
bs_jsons.append(json_data)
|
||||
except json.JSONDecodeError:
|
||||
# Just skip it
|
||||
# Skip objects which cannot be parsed
|
||||
continue
|
||||
else:
|
||||
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||
if ensure_is_ldjson_info_type:
|
||||
# Could sometimes be list, string or something else random
|
||||
if isinstance(json_data, dict):
|
||||
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
||||
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
||||
if json_data.get('@type', False) and json_data.get('@type','').lower() == ensure_is_ldjson_info_type.lower() and stripped_text_from_html:
|
||||
break
|
||||
elif stripped_text_from_html:
|
||||
break
|
||||
|
||||
if not bs_jsons:
|
||||
raise JSONNotFound("No parsable JSON found in this document")
|
||||
|
||||
for json_data in bs_jsons:
|
||||
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||
if ensure_is_ldjson_info_type:
|
||||
# Could sometimes be list, string or something else random
|
||||
if isinstance(json_data, dict):
|
||||
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
||||
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
||||
if json_data.get('@type', False) and json_data.get('@type','').lower() == ensure_is_ldjson_info_type.lower() and stripped_text_from_html:
|
||||
break
|
||||
elif stripped_text_from_html:
|
||||
break
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Re 265 - Just return an empty string when filter not found
|
||||
|
||||
@@ -52,7 +52,8 @@ class import_url_list(Importer):
|
||||
|
||||
# Flask wtform validators wont work with basic auth, use validators package
|
||||
# Up to 5000 per batch so we dont flood the server
|
||||
if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
|
||||
# @todo validators.url failed on local hostnames (such as referring to ourself when using browserless)
|
||||
if len(url) and 'http' in url.lower() and good < 5000:
|
||||
extras = None
|
||||
if processor:
|
||||
extras = {'processor': processor}
|
||||
|
||||
@@ -23,25 +23,26 @@ class model(dict):
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
|
||||
},
|
||||
'application': {
|
||||
# Custom notification content
|
||||
'api_access_token_enabled': True,
|
||||
'password': False,
|
||||
'base_url' : None,
|
||||
'extract_title_as_title': False,
|
||||
'empty_pages_are_a_change': False,
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'global_subtractive_selectors': [],
|
||||
'ignore_whitespace': True,
|
||||
'render_anchor_tag_content': False,
|
||||
'notification_urls': [], # Apprise URL list
|
||||
# Custom notification content
|
||||
'notification_title': default_notification_title,
|
||||
'notification_body': default_notification_body,
|
||||
'notification_format': default_notification_format,
|
||||
'notification_title': default_notification_title,
|
||||
'notification_urls': [], # Apprise URL list
|
||||
'pager_size': 50,
|
||||
'password': False,
|
||||
'render_anchor_tag_content': False,
|
||||
'schema_version' : 0,
|
||||
'shared_diff_access': False,
|
||||
'webdriver_delay': None # Extra delay in seconds before extracting text
|
||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -49,3 +50,15 @@ class model(dict):
|
||||
def __init__(self, *arg, **kw):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
self.update(self.base_config)
|
||||
|
||||
|
||||
def parse_headers_from_text_file(filepath):
|
||||
headers = {}
|
||||
with open(filepath, 'r') as f:
|
||||
for l in f.readlines():
|
||||
l = l.strip()
|
||||
if not l.startswith('#') and ':' in l:
|
||||
(k, v) = l.split(':')
|
||||
headers[k.strip()] = v.strip()
|
||||
|
||||
return headers
|
||||
@@ -473,6 +473,40 @@ class model(dict):
|
||||
# None is set
|
||||
return False
|
||||
|
||||
@property
|
||||
def has_extra_headers_file(self):
|
||||
if os.path.isfile(os.path.join(self.watch_data_dir, 'headers.txt')):
|
||||
return True
|
||||
|
||||
for f in self.all_tags:
|
||||
fname = "headers-"+re.sub(r'[\W_]', '', f).lower().strip() + ".txt"
|
||||
filepath = os.path.join(self.__datastore_path, fname)
|
||||
if os.path.isfile(filepath):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def get_all_headers(self):
|
||||
from .App import parse_headers_from_text_file
|
||||
headers = self.get('headers', {}).copy()
|
||||
# Available headers on the disk could 'headers.txt' in the watch data dir
|
||||
filepath = os.path.join(self.watch_data_dir, 'headers.txt')
|
||||
try:
|
||||
if os.path.isfile(filepath):
|
||||
headers.update(parse_headers_from_text_file(filepath))
|
||||
except Exception as e:
|
||||
print(f"ERROR reading headers.txt at {filepath}", str(e))
|
||||
|
||||
# Or each by tag, as tagname.txt in the main datadir
|
||||
for f in self.all_tags:
|
||||
fname = "headers-"+re.sub(r'[\W_]', '', f).lower().strip() + ".txt"
|
||||
filepath = os.path.join(self.__datastore_path, fname)
|
||||
try:
|
||||
if os.path.isfile(filepath):
|
||||
headers.update(parse_headers_from_text_file(filepath))
|
||||
except Exception as e:
|
||||
print(f"ERROR reading headers.txt at {filepath}", str(e))
|
||||
return headers
|
||||
|
||||
def get_last_fetched_before_filters(self):
|
||||
import brotli
|
||||
|
||||
@@ -12,6 +12,12 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
name = 'Re-stock detection for single product pages'
|
||||
description = 'Detects if the product goes back to in-stock'
|
||||
|
||||
class UnableToExtractRestockData(Exception):
|
||||
def __init__(self, status_code):
|
||||
# Set this so we can use it in other parts of the app
|
||||
self.status_code = status_code
|
||||
return
|
||||
|
||||
class perform_site_check(difference_detection_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
@@ -105,7 +111,8 @@ class perform_site_check(difference_detection_processor):
|
||||
fetched_md5 = hashlib.md5(fetcher.instock_data.encode('utf-8')).hexdigest()
|
||||
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
|
||||
update_obj["in_stock"] = True if fetcher.instock_data == 'Possibly in stock' else False
|
||||
|
||||
else:
|
||||
raise UnableToExtractRestockData(status_code=fetcher.status_code)
|
||||
|
||||
# The main thing that all this at the moment comes down to :)
|
||||
changed_detected = False
|
||||
|
||||
@@ -54,7 +54,6 @@ class perform_site_check(difference_detection_processor):
|
||||
changed_detected = False
|
||||
screenshot = False # as bytes
|
||||
stripped_text_from_html = ""
|
||||
source_filter = None # A machine filter that can be applied to source: (url|filter)
|
||||
|
||||
# DeepCopy so we can be sure we don't accidently change anything by reference
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
@@ -71,10 +70,9 @@ class perform_site_check(difference_detection_processor):
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
|
||||
extra_headers = watch.get('headers', [])
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
request_headers = deepcopy(self.datastore.data['settings']['headers'])
|
||||
extra_headers = watch.get_all_headers()
|
||||
request_headers = self.datastore.get_all_headers()
|
||||
request_headers.update(extra_headers)
|
||||
|
||||
# https://github.com/psf/requests/issues/4525
|
||||
@@ -141,7 +139,7 @@ class perform_site_check(difference_detection_processor):
|
||||
self.xpath_data = fetcher.xpath_data
|
||||
|
||||
# Track the content type
|
||||
update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
|
||||
update_obj['content_type'] = fetcher.get_all_headers().get('content-type', '').lower()
|
||||
|
||||
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
||||
# Saves a lot of CPU
|
||||
@@ -161,7 +159,7 @@ class perform_site_check(difference_detection_processor):
|
||||
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
|
||||
# return content().textfilter().jsonextract().checksumcompare() ?
|
||||
|
||||
is_json = 'application/json' in fetcher.headers.get('Content-Type', '')
|
||||
is_json = 'application/json' in fetcher.get_all_headers().get('content-type', '').lower()
|
||||
is_html = not is_json
|
||||
|
||||
# source: support, basically treat it as plaintext
|
||||
@@ -169,7 +167,7 @@ class perform_site_check(difference_detection_processor):
|
||||
is_html = False
|
||||
is_json = False
|
||||
|
||||
if watch.is_pdf or 'application/pdf' in fetcher.headers.get('Content-Type', '').lower():
|
||||
if watch.is_pdf or 'application/pdf' in fetcher.get_all_headers().get('content-type', '').lower():
|
||||
from shutil import which
|
||||
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
|
||||
if not which(tool):
|
||||
@@ -237,7 +235,7 @@ class perform_site_check(difference_detection_processor):
|
||||
html_content = fetcher.content
|
||||
|
||||
# If not JSON, and if it's not text/plain..
|
||||
if 'text/plain' in fetcher.headers.get('Content-Type', '').lower():
|
||||
if 'text/plain' in fetcher.get_all_headers().get('content-type', '').lower():
|
||||
# Don't run get_text or xpath/css filters on plaintext
|
||||
stripped_text_from_html = html_content
|
||||
else:
|
||||
@@ -250,11 +248,6 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
for filter_rule in include_filters_rule:
|
||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||
if '|' in filter_rule:
|
||||
filter_rule, source_filter = filter_rule.split('|')
|
||||
else:
|
||||
source_filter = None
|
||||
|
||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=fetcher.content,
|
||||
@@ -264,10 +257,6 @@ class perform_site_check(difference_detection_processor):
|
||||
html_content += html_tools.include_filters(include_filters=filter_rule,
|
||||
html_content=fetcher.content,
|
||||
append_pretty_line_formatting=not is_source)
|
||||
if source_filter == 'pretty':
|
||||
from bs4 import BeautifulSoup
|
||||
html_content = BeautifulSoup(html_content, 'html.parser').prettify()
|
||||
|
||||
|
||||
if not html_content.strip():
|
||||
raise FilterNotFoundInResponse(include_filters_rule)
|
||||
|
||||
@@ -10,6 +10,7 @@ function isItemInStock() {
|
||||
'brak na stanie',
|
||||
'brak w magazynie',
|
||||
'coming soon',
|
||||
'currently have any tickets for this',
|
||||
'currently unavailable',
|
||||
'en rupture de stock',
|
||||
'item is no longer available',
|
||||
@@ -20,7 +21,9 @@ function isItemInStock() {
|
||||
'nicht zur verfügung',
|
||||
'no disponible temporalmente',
|
||||
'no longer in stock',
|
||||
'no tickets available',
|
||||
'not available',
|
||||
'not currently available',
|
||||
'not in stock',
|
||||
'notify me when available',
|
||||
'não estamos a aceitar encomendas',
|
||||
@@ -30,6 +33,8 @@ function isItemInStock() {
|
||||
'sold out',
|
||||
'temporarily out of stock',
|
||||
'temporarily unavailable',
|
||||
'tickets unavailable',
|
||||
'unavailable tickets',
|
||||
'we do not currently have an estimate of when this product will be back in stock.',
|
||||
'zur zeit nicht an lager',
|
||||
];
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* Toggles theme between light and dark mode.
|
||||
*/
|
||||
$(document).ready(function () {
|
||||
const button = document.getElementsByClassName("toggle-theme")[0];
|
||||
const button = document.getElementById("toggle-light-mode");
|
||||
|
||||
button.onclick = () => {
|
||||
const htmlElement = document.getElementsByTagName("html");
|
||||
@@ -21,4 +21,33 @@ $(document).ready(function () {
|
||||
const setCookieValue = (value) => {
|
||||
document.cookie = `css_dark_mode=${value};max-age=31536000;path=/`
|
||||
}
|
||||
|
||||
// Search input box behaviour
|
||||
const toggle_search = document.getElementById("toggle-search");
|
||||
const search_q = document.getElementById("search-q");
|
||||
window.addEventListener('keydown', function (e) {
|
||||
|
||||
if (e.altKey == true && e.keyCode == 83)
|
||||
search_q.classList.toggle('expanded');
|
||||
search_q.focus();
|
||||
});
|
||||
|
||||
|
||||
search_q.onkeydown = (e) => {
|
||||
var key = e.keyCode || e.which;
|
||||
if (key === 13) {
|
||||
document.searchForm.submit();
|
||||
}
|
||||
};
|
||||
toggle_search.onclick = () => {
|
||||
// Could be that they want to search something once text is in there
|
||||
if (search_q.value.length) {
|
||||
document.searchForm.submit();
|
||||
} else {
|
||||
// If not..
|
||||
search_q.classList.toggle('expanded');
|
||||
search_q.focus();
|
||||
}
|
||||
};
|
||||
|
||||
});
|
||||
|
||||
@@ -54,8 +54,47 @@ a.github-link {
|
||||
}
|
||||
}
|
||||
|
||||
button.toggle-theme {
|
||||
width: 4rem;
|
||||
#toggle-light-mode {
|
||||
width: 3rem;
|
||||
.icon-dark {
|
||||
display: none;
|
||||
}
|
||||
|
||||
&.dark {
|
||||
.icon-light {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.icon-dark {
|
||||
display: block;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#toggle-search {
|
||||
width: 2rem;
|
||||
}
|
||||
|
||||
#search-q {
|
||||
opacity: 0;
|
||||
-webkit-transition: all .9s ease;
|
||||
-moz-transition: all .9s ease;
|
||||
transition: all .9s ease;
|
||||
width: 0;
|
||||
display: none;
|
||||
&.expanded {
|
||||
width: auto;
|
||||
display: inline-block;
|
||||
|
||||
opacity: 1;
|
||||
}
|
||||
}
|
||||
#search-result-info {
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
button.toggle-button {
|
||||
vertical-align: middle;
|
||||
background: transparent;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
@@ -74,19 +113,7 @@ button.toggle-theme {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.icon-dark {
|
||||
display: none;
|
||||
}
|
||||
|
||||
&.dark {
|
||||
.icon-light {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.icon-dark {
|
||||
display: block;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.pure-menu-horizontal {
|
||||
|
||||
@@ -331,23 +331,44 @@ a.github-link {
|
||||
a.github-link:hover {
|
||||
color: var(--color-icon-github-hover); }
|
||||
|
||||
button.toggle-theme {
|
||||
width: 4rem;
|
||||
#toggle-light-mode {
|
||||
width: 3rem; }
|
||||
#toggle-light-mode .icon-dark {
|
||||
display: none; }
|
||||
#toggle-light-mode.dark .icon-light {
|
||||
display: none; }
|
||||
#toggle-light-mode.dark .icon-dark {
|
||||
display: block; }
|
||||
|
||||
#toggle-search {
|
||||
width: 2rem; }
|
||||
|
||||
#search-q {
|
||||
opacity: 0;
|
||||
-webkit-transition: all .9s ease;
|
||||
-moz-transition: all .9s ease;
|
||||
transition: all .9s ease;
|
||||
width: 0;
|
||||
display: none; }
|
||||
#search-q.expanded {
|
||||
width: auto;
|
||||
display: inline-block;
|
||||
opacity: 1; }
|
||||
|
||||
#search-result-info {
|
||||
color: #fff; }
|
||||
|
||||
button.toggle-button {
|
||||
vertical-align: middle;
|
||||
background: transparent;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
color: var(--color-icon-github); }
|
||||
button.toggle-theme:hover {
|
||||
button.toggle-button:hover {
|
||||
color: var(--color-icon-github-hover); }
|
||||
button.toggle-theme svg {
|
||||
button.toggle-button svg {
|
||||
fill: currentColor; }
|
||||
button.toggle-theme .icon-light {
|
||||
display: block; }
|
||||
button.toggle-theme .icon-dark {
|
||||
display: none; }
|
||||
button.toggle-theme.dark .icon-light {
|
||||
display: none; }
|
||||
button.toggle-theme.dark .icon-dark {
|
||||
button.toggle-button .icon-light {
|
||||
display: block; }
|
||||
|
||||
.pure-menu-horizontal {
|
||||
|
||||
@@ -3,7 +3,7 @@ from flask import (
|
||||
)
|
||||
|
||||
from . model import App, Watch
|
||||
from copy import deepcopy
|
||||
from copy import deepcopy, copy
|
||||
from os import path, unlink
|
||||
from threading import Lock
|
||||
import json
|
||||
@@ -204,15 +204,16 @@ class ChangeDetectionStore:
|
||||
# GitHub #30 also delete history records
|
||||
for uuid in self.data['watching']:
|
||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||
shutil.rmtree(path)
|
||||
self.needs_write_urgent = True
|
||||
if os.path.exists(path):
|
||||
shutil.rmtree(path)
|
||||
|
||||
else:
|
||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||
shutil.rmtree(path)
|
||||
if os.path.exists(path):
|
||||
shutil.rmtree(path)
|
||||
del self.data['watching'][uuid]
|
||||
|
||||
self.needs_write_urgent = True
|
||||
self.needs_write_urgent = True
|
||||
|
||||
# Clone a watch by UUID
|
||||
def clone(self, uuid):
|
||||
@@ -366,19 +367,21 @@ class ChangeDetectionStore:
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
if not self.data['watching'].get(watch_uuid):
|
||||
return
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
|
||||
|
||||
self.data['watching'][watch_uuid].ensure_data_dir_exists()
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
|
||||
with open(target_path, 'w') as f:
|
||||
f.write(contents)
|
||||
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
|
||||
if not self.data['watching'].get(watch_uuid):
|
||||
return
|
||||
if as_error:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
|
||||
else:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
|
||||
|
||||
self.data['watching'][watch_uuid].ensure_data_dir_exists()
|
||||
with open(target_path, 'w') as f:
|
||||
f.write(json.dumps(data))
|
||||
f.close()
|
||||
@@ -472,8 +475,6 @@ class ChangeDetectionStore:
|
||||
return proxy_list if len(proxy_list) else None
|
||||
|
||||
|
||||
|
||||
|
||||
def get_preferred_proxy_for_watch(self, uuid):
|
||||
"""
|
||||
Returns the preferred proxy by ID key
|
||||
@@ -505,6 +506,25 @@ class ChangeDetectionStore:
|
||||
|
||||
return None
|
||||
|
||||
@property
|
||||
def has_extra_headers_file(self):
|
||||
filepath = os.path.join(self.datastore_path, 'headers.txt')
|
||||
return os.path.isfile(filepath)
|
||||
|
||||
def get_all_headers(self):
|
||||
from .model.App import parse_headers_from_text_file
|
||||
headers = copy(self.data['settings'].get('headers', {}))
|
||||
|
||||
filepath = os.path.join(self.datastore_path, 'headers.txt')
|
||||
try:
|
||||
if os.path.isfile(filepath):
|
||||
headers.update(parse_headers_from_text_file(filepath))
|
||||
except Exception as e:
|
||||
print(f"ERROR reading headers.txt at {filepath}", str(e))
|
||||
|
||||
return headers
|
||||
|
||||
|
||||
# Run all updates
|
||||
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
|
||||
# So therefor - each `update_n` should be very careful about checking if it needs to actually run
|
||||
|
||||
@@ -115,7 +115,7 @@
|
||||
URLs generated by changedetection.io (such as <code>{{ '{{diff_url}}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br>
|
||||
Your <code>BASE_URL</code> var is currently "{{settings_application['current_base_url']}}"
|
||||
<br>
|
||||
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removal%7D%7D-notification-tokens">More Here</a> <br>
|
||||
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -82,11 +82,21 @@
|
||||
<a href="{{url_for('logout')}}" class="pure-menu-link">LOG OUT</a>
|
||||
</li>
|
||||
{% endif %}
|
||||
<li class="pure-menu-item pure-form" id="search-menu-item">
|
||||
<!-- We use GET here so it offers people a chance to set bookmarks etc -->
|
||||
<form name="searchForm" action="" method="GET">
|
||||
<input id="search-q" class="" name="q" placeholder="URL or Title {% if active_tag %}in '{{ active_tag }}'{% endif %}" required="" type="text" value="">
|
||||
<input name="tag" type="hidden" value="{% if active_tag %}{{active_tag}}{% endif %}">
|
||||
<button class="toggle-button " id="toggle-search" type="button" title="Search, or Use Alt+S Key" >
|
||||
{% include "svgs/search-icon.svg" %}
|
||||
</button>
|
||||
</form>
|
||||
</li>
|
||||
<li class="pure-menu-item">
|
||||
{% if dark_mode %}
|
||||
{% set darkClass = 'dark' %}
|
||||
{% endif %}
|
||||
<button class="toggle-theme {{darkClass}}" type="button" title="Toggle Light/Dark Mode">
|
||||
<button class="toggle-button {{darkClass}}" id ="toggle-light-mode" type="button" title="Toggle Light/Dark Mode">
|
||||
<span class="visually-hidden">Toggle light/dark mode</span>
|
||||
<span class="icon-light">
|
||||
{% include "svgs/light-mode-toggle-icon.svg" %}
|
||||
|
||||
@@ -152,6 +152,17 @@
|
||||
{{ render_field(form.headers, rows=5, placeholder="Example
|
||||
Cookie: foobar
|
||||
User-Agent: wonderbra 1.0") }}
|
||||
|
||||
<div class="pure-form-message-inline">
|
||||
{% if has_extra_headers_file %}
|
||||
<strong>Alert! Extra headers file found and will be added to this watch!</strong>
|
||||
{% else %}
|
||||
Headers can be also read from a file in your data-directory <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Adding-headers-from-an-external-file">Read more here</a>
|
||||
{% endif %}
|
||||
<br>
|
||||
(Not supported by Selenium browser)
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="pure-control-group" id="request-body">
|
||||
{{ render_field(form.body, rows=5, placeholder="Example
|
||||
|
||||
@@ -70,6 +70,10 @@
|
||||
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.pager_size) }}
|
||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
|
||||
|
||||
1
changedetectionio/templates/svgs/search-icon.svg
Normal file
1
changedetectionio/templates/svgs/search-icon.svg
Normal file
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 122.879 119.799" enable-background="new 0 0 122.879 119.799" xml:space="preserve"><g><path d="M49.988,0h0.016v0.007C63.803,0.011,76.298,5.608,85.34,14.652c9.027,9.031,14.619,21.515,14.628,35.303h0.007v0.033v0.04 h-0.007c-0.005,5.557-0.917,10.905-2.594,15.892c-0.281,0.837-0.575,1.641-0.877,2.409v0.007c-1.446,3.66-3.315,7.12-5.547,10.307 l29.082,26.139l0.018,0.016l0.157,0.146l0.011,0.011c1.642,1.563,2.536,3.656,2.649,5.78c0.11,2.1-0.543,4.248-1.979,5.971 l-0.011,0.016l-0.175,0.203l-0.035,0.035l-0.146,0.16l-0.016,0.021c-1.565,1.642-3.654,2.534-5.78,2.646 c-2.097,0.111-4.247-0.54-5.971-1.978l-0.015-0.011l-0.204-0.175l-0.029-0.024L78.761,90.865c-0.88,0.62-1.778,1.209-2.687,1.765 c-1.233,0.755-2.51,1.466-3.813,2.115c-6.699,3.342-14.269,5.222-22.272,5.222v0.007h-0.016v-0.007 c-13.799-0.004-26.296-5.601-35.338-14.645C5.605,76.291,0.016,63.805,0.007,50.021H0v-0.033v-0.016h0.007 c0.004-13.799,5.601-26.296,14.645-35.338C23.683,5.608,36.167,0.016,49.955,0.007V0H49.988L49.988,0z M50.004,11.21v0.007h-0.016 h-0.033V11.21c-10.686,0.007-20.372,4.35-27.384,11.359C15.56,29.578,11.213,39.274,11.21,49.973h0.007v0.016v0.033H11.21 c0.007,10.686,4.347,20.367,11.359,27.381c7.009,7.012,16.705,11.359,27.403,11.361v-0.007h0.016h0.033v0.007 c10.686-0.007,20.368-4.348,27.382-11.359c7.011-7.009,11.358-16.702,11.36-27.4h-0.006v-0.016v-0.033h0.006 c-0.006-10.686-4.35-20.372-11.358-27.384C70.396,15.56,60.703,11.213,50.004,11.21L50.004,11.21z"/></g></svg>
|
||||
|
After Width: | Height: | Size: 1.6 KiB |
@@ -44,6 +44,7 @@
|
||||
{% if watches|length >= pagination.per_page %}
|
||||
{{ pagination.info }}
|
||||
{% endif %}
|
||||
{% if search_q %}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{% endif %}
|
||||
<div>
|
||||
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
|
||||
{% for tag in tags %}
|
||||
@@ -73,8 +74,12 @@
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
|
||||
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))[pagination.skip:pagination.skip+pagination.per_page] %}
|
||||
{% if not watches|length %}
|
||||
<tr>
|
||||
<td colspan="6">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('import_page')}}" >import a list</a>.</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
|
||||
<tr id="{{ watch.uuid }}"
|
||||
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
|
||||
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
||||
|
||||
@@ -14,13 +14,16 @@ global app
|
||||
|
||||
def cleanup(datastore_path):
|
||||
# Unlink test output files
|
||||
files = ['output.txt',
|
||||
'url-watches.json',
|
||||
'secret.txt',
|
||||
'notification.txt',
|
||||
'count.txt',
|
||||
'endpoint-content.txt'
|
||||
]
|
||||
files = [
|
||||
'count.txt',
|
||||
'endpoint-content.txt'
|
||||
'headers.txt',
|
||||
'headers-testtag.txt',
|
||||
'notification.txt',
|
||||
'secret.txt',
|
||||
'url-watches.json',
|
||||
'output.txt',
|
||||
]
|
||||
for file in files:
|
||||
try:
|
||||
os.unlink("{}/{}".format(datastore_path, file))
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
import time
|
||||
from flask import url_for, escape
|
||||
from . util import live_server_setup
|
||||
from . util import live_server_setup, wait_for_all_checks
|
||||
import pytest
|
||||
jq_support = True
|
||||
|
||||
@@ -64,6 +64,24 @@ and it can also be repeated
|
||||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
|
||||
|
||||
|
||||
def test_unittest_inline_extract_body():
|
||||
content = """
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<pre style="word-wrap: break-word; white-space: pre-wrap;">
|
||||
{"testKey": 42}
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
from .. import html_tools
|
||||
|
||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||
text = html_tools.extract_json_as_string(content, "json:$.testKey")
|
||||
assert text == '42'
|
||||
|
||||
def set_original_ext_response():
|
||||
data = """
|
||||
[
|
||||
@@ -436,6 +454,37 @@ def test_ignore_json_order(client, live_server):
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_correct_header_detect(client, live_server):
|
||||
# Like in https://github.com/dgtlmoon/changedetection.io/pull/1593
|
||||
# Specify extra html that JSON is sometimes wrapped in - when using Browserless/Puppeteer etc
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write('<html><body>{"hello" : 123, "world": 123}')
|
||||
|
||||
# Add our URL to the import page
|
||||
# Check weird casing is cleaned up and detected also
|
||||
test_url = url_for('test_endpoint', content_type="aPPlication/JSon", uppercase_headers=True, _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
|
||||
# Fixed in #1593
|
||||
assert b'No parsable JSON found in this document' not in res.data
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'"world":' in res.data
|
||||
assert res.data.count(b'{') >= 2
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_check_jsonpath_ext_filter(client, live_server):
|
||||
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
|
||||
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from . util import set_original_response, set_modified_response, live_server_setup
|
||||
from . util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
|
||||
def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
@@ -9,8 +10,12 @@ def test_setup(live_server):
|
||||
# Hard to just add more live server URLs when one test is already running (I think)
|
||||
# So we add our test here (was in a different file)
|
||||
def test_headers_in_request(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_headers', _external=True)
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||
# Because its no longer calling back to localhost but from browserless, set in test-only.yml
|
||||
test_url = test_url.replace('localhost', 'changedet')
|
||||
|
||||
# Add the test URL twice, we will check
|
||||
res = client.post(
|
||||
@@ -29,7 +34,7 @@ def test_headers_in_request(client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(3)
|
||||
wait_for_all_checks(client)
|
||||
cookie_header = '_ga=GA1.2.1022228332; cookie-preferences=analytics:accepted;'
|
||||
|
||||
|
||||
@@ -39,7 +44,7 @@ def test_headers_in_request(client, live_server):
|
||||
data={
|
||||
"url": test_url,
|
||||
"tag": "",
|
||||
"fetch_backend": "html_requests",
|
||||
"fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
|
||||
"headers": "xxx:ooo\ncool:yeah\r\ncookie:"+cookie_header},
|
||||
follow_redirects=True
|
||||
)
|
||||
@@ -47,7 +52,7 @@ def test_headers_in_request(client, live_server):
|
||||
|
||||
|
||||
# Give the thread time to pick up the first version
|
||||
time.sleep(5)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# The service should echo back the request headers
|
||||
res = client.get(
|
||||
@@ -63,7 +68,7 @@ def test_headers_in_request(client, live_server):
|
||||
from html import escape
|
||||
assert escape(cookie_header).encode('utf-8') in res.data
|
||||
|
||||
time.sleep(5)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Re #137 - Examine the JSON index file, it should have only one set of headers entered
|
||||
watches_with_headers = 0
|
||||
@@ -79,6 +84,9 @@ def test_headers_in_request(client, live_server):
|
||||
def test_body_in_request(client, live_server):
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_body', _external=True)
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||
# Because its no longer calling back to localhost but from browserless, set in test-only.yml
|
||||
test_url = test_url.replace('localhost', 'cdio')
|
||||
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
@@ -167,6 +175,9 @@ def test_body_in_request(client, live_server):
|
||||
def test_method_in_request(client, live_server):
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_method', _external=True)
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||
# Because its no longer calling back to localhost but from browserless, set in test-only.yml
|
||||
test_url = test_url.replace('localhost', 'cdio')
|
||||
|
||||
# Add the test URL twice, we will check
|
||||
res = client.post(
|
||||
@@ -234,3 +245,76 @@ def test_method_in_request(client, live_server):
|
||||
# Should be only one with method set to PATCH
|
||||
assert watches_with_method == 1
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_headers_textfile_in_request(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_headers', _external=True)
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||
# Because its no longer calling back to localhost but from browserless, set in test-only.yml
|
||||
test_url = test_url.replace('localhost', 'cdio')
|
||||
|
||||
print ("TEST URL IS ",test_url)
|
||||
# Add the test URL twice, we will check
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
# Add some headers to a request
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={
|
||||
"url": test_url,
|
||||
"tag": "testtag",
|
||||
"fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
|
||||
"headers": "xxx:ooo\ncool:yeah\r\n"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
with open('test-datastore/headers-testtag.txt', 'w') as f:
|
||||
f.write("tag-header: test")
|
||||
|
||||
with open('test-datastore/headers.txt', 'w') as f:
|
||||
f.write("global-header: nice\r\nnext-global-header: nice")
|
||||
|
||||
with open('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt', 'w') as f:
|
||||
f.write("watch-header: nice")
|
||||
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("edit_page", uuid="first"))
|
||||
assert b"Extra headers file found and will be added to this watch" in res.data
|
||||
|
||||
# Not needed anymore
|
||||
os.unlink('test-datastore/headers.txt')
|
||||
os.unlink('test-datastore/headers-testtag.txt')
|
||||
os.unlink('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt')
|
||||
# The service should echo back the request verb
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Global-Header:nice" in res.data
|
||||
assert b"Next-Global-Header:nice" in res.data
|
||||
assert b"Xxx:ooo" in res.data
|
||||
assert b"Watch-Header:nice" in res.data
|
||||
assert b"Tag-Header:test" in res.data
|
||||
|
||||
|
||||
#unlink headers.txt on start/stop
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
74
changedetectionio/tests/test_search.py
Normal file
74
changedetectionio/tests/test_search.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from flask import url_for
|
||||
from .util import set_original_response, set_modified_response, live_server_setup
|
||||
import time
|
||||
|
||||
def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
def test_basic_search(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
urls = ['https://localhost:12300?first-result=1',
|
||||
'https://localhost:5000?second-result=1'
|
||||
]
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": "\r\n".join(urls)},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"2 Imported" in res.data
|
||||
|
||||
# By URL
|
||||
res = client.get(url_for("index") + "?q=first-res")
|
||||
assert urls[0].encode('utf-8') in res.data
|
||||
assert urls[1].encode('utf-8') not in res.data
|
||||
|
||||
# By Title
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"title": "xxx-title", "url": urls[0], "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
res = client.get(url_for("index") + "?q=xxx-title")
|
||||
assert urls[0].encode('utf-8') in res.data
|
||||
assert urls[1].encode('utf-8') not in res.data
|
||||
|
||||
|
||||
def test_search_in_tag_limit(client, live_server):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
urls = ['https://localhost:12300?first-result=1 tag-one',
|
||||
'https://localhost:5000?second-result=1 tag-two'
|
||||
]
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": "\r\n".join(urls)},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"2 Imported" in res.data
|
||||
|
||||
# By URL
|
||||
|
||||
res = client.get(url_for("index") + "?q=first-res")
|
||||
# Split because of the import tag separation
|
||||
assert urls[0].split(' ')[0].encode('utf-8') in res.data, urls[0].encode('utf-8')
|
||||
assert urls[1].split(' ')[0].encode('utf-8') not in res.data, urls[0].encode('utf-8')
|
||||
|
||||
# By Title
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"title": "xxx-title", "url": urls[0].split(' ')[0], "tag": urls[0].split(' ')[1], "headers": "",
|
||||
'fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
res = client.get(url_for("index") + "?q=xxx-title")
|
||||
assert urls[0].split(' ')[0].encode('utf-8') in res.data, urls[0].encode('utf-8')
|
||||
assert urls[1].split(' ')[0].encode('utf-8') not in res.data, urls[0].encode('utf-8')
|
||||
|
||||
@@ -119,16 +119,26 @@ def live_server_setup(live_server):
|
||||
status_code = request.args.get('status_code')
|
||||
content = request.args.get('content') or None
|
||||
|
||||
# Used to just try to break the header detection
|
||||
uppercase_headers = request.args.get('uppercase_headers')
|
||||
|
||||
try:
|
||||
if content is not None:
|
||||
resp = make_response(content, status_code)
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
if uppercase_headers:
|
||||
ctype=ctype.upper()
|
||||
resp.headers['CONTENT-TYPE'] = ctype if ctype else 'text/html'
|
||||
else:
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
return resp
|
||||
|
||||
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
||||
with open("test-datastore/endpoint-content.txt", "r") as f:
|
||||
resp = make_response(f.read(), status_code)
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
if uppercase_headers:
|
||||
resp.headers['CONTENT-TYPE'] = ctype if ctype else 'text/html'
|
||||
else:
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
return resp
|
||||
except FileNotFoundError:
|
||||
return make_response('', status_code)
|
||||
|
||||
@@ -5,7 +5,7 @@ import time
|
||||
|
||||
from changedetectionio import content_fetcher
|
||||
from .processors.text_json_diff import FilterNotFoundInResponse
|
||||
|
||||
from .processors.restock_diff import UnableToExtractRestockData
|
||||
|
||||
# A single update worker
|
||||
#
|
||||
@@ -238,7 +238,7 @@ class update_worker(threading.Thread):
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
err_text = "Warning, no filters were found, no change detection ran."
|
||||
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||
|
||||
# Only when enabled, send the notification
|
||||
@@ -262,6 +262,7 @@ class update_worker(threading.Thread):
|
||||
# Yes fine, so nothing todo, don't continue to process.
|
||||
process_changedetection_results = False
|
||||
changed_detected = False
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': False})
|
||||
|
||||
except content_fetcher.BrowserStepsStepTimout as e:
|
||||
|
||||
@@ -318,6 +319,11 @@ class update_worker(threading.Thread):
|
||||
'last_check_status': e.status_code,
|
||||
'has_ldjson_price_data': None})
|
||||
process_changedetection_results = False
|
||||
except UnableToExtractRestockData as e:
|
||||
# Usually when fetcher.instock_data returns empty
|
||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Unable to extract restock data for this page unfortunately. (Got code {e.status_code} from server)"})
|
||||
process_changedetection_results = False
|
||||
except Exception as e:
|
||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
Reference in New Issue
Block a user