mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-06-14 21:01:52 +00:00
Compare commits
42 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5b236ba6ad | |||
| e62eeb1c4a | |||
| a4e6fd1ec3 | |||
| d8b9f0fd78 | |||
| f9387522ee | |||
| ba8d2e0c2d | |||
| 247db22a33 | |||
| aeabd5b3fc | |||
| e9e1ce893f | |||
| b5a415c7b6 | |||
| 9e954532d6 | |||
| 955835df72 | |||
| 1aeafef910 | |||
| 1367197df7 | |||
| 143971123d | |||
| 04d2d3fb00 | |||
| 236f0c098d | |||
| 582c6b465b | |||
| a021ba87fa | |||
| e9057cb851 | |||
| 72ec438caa | |||
| 367dec48e1 | |||
| dd87912c88 | |||
| 0126cb0aac | |||
| 463b2d0449 | |||
| e4f6d54ae2 | |||
| 5f338d7824 | |||
| 0b563a93ec | |||
| d939882dde | |||
| 690cf4acc9 | |||
| 3cb3c7ba2e | |||
| 5325918f29 | |||
| 8eee913438 | |||
| 06921d973e | |||
| 316f28a0f2 | |||
| 3801d339f5 | |||
| d814535dc6 | |||
| cf3f3e4497 | |||
| ba76c2a280 | |||
| 94f38f052e | |||
| 1710885fc4 | |||
| 2018e73240 |
@@ -30,7 +30,7 @@ jobs:
|
|||||||
|
|
||||||
# Selenium+browserless
|
# Selenium+browserless
|
||||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome-debug:3.141.59
|
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome-debug:3.141.59
|
||||||
docker run --network changedet-network -d --hostname browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
|
docker run --network changedet-network -d --hostname browserless -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
|
||||||
|
|
||||||
- name: Build changedetection.io container for testing
|
- name: Build changedetection.io container for testing
|
||||||
run: |
|
run: |
|
||||||
@@ -55,9 +55,19 @@ jobs:
|
|||||||
# Playwright/Browserless fetch
|
# Playwright/Browserless fetch
|
||||||
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
|
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
|
||||||
|
|
||||||
|
# Settings headers playwright tests - Call back in from Browserless, check headers
|
||||||
|
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||||
|
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||||
|
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "USE_EXPERIMENTAL_PUPPETEER_FETCH=yes" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||||
|
|
||||||
# restock detection via playwright - added name=changedet here so that playwright/browserless can connect to it
|
# restock detection via playwright - added name=changedet here so that playwright/browserless can connect to it
|
||||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||||
|
|
||||||
|
- name: Test with puppeteer fetcher and disk cache
|
||||||
|
run: |
|
||||||
|
docker run --rm -e "PUPPETEER_DISK_CACHE=/tmp/data/" -e "USE_EXPERIMENTAL_PUPPETEER_FETCH=yes" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
|
||||||
|
# Browserless would have had -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" added above
|
||||||
|
|
||||||
- name: Test proxy interaction
|
- name: Test proxy interaction
|
||||||
run: |
|
run: |
|
||||||
cd changedetectionio
|
cd changedetectionio
|
||||||
|
|||||||
+2
-2
@@ -2,10 +2,10 @@
|
|||||||
|
|
||||||
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
|
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
|
||||||
|
|
||||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start?src=pip)
|
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://changedetection.io)
|
||||||
|
|
||||||
|
|
||||||
[**Don't have time? Let us host it for you! try our extremely affordable subscription use our proxies and support!**](https://lemonade.changedetection.io/start)
|
[**Don't have time? Let us host it for you! try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
||||||
|
|
||||||
|
|
||||||
#### Example use cases
|
#### Example use cases
|
||||||
|
|||||||
@@ -5,13 +5,13 @@
|
|||||||
_Live your data-life pro-actively._
|
_Live your data-life pro-actively._
|
||||||
|
|
||||||
|
|
||||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start?src=github)
|
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://changedetection.io?src=github)
|
||||||
|
|
||||||
[![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md)
|
[![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md)
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
[**Don't have time? Let us host it for you! try our $8.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
|
[**Don't have time? Let us host it for you! try our $8.99/month subscription - use our proxies and support!**](https://changedetection.io) , _half the price of other website change monitoring services!_
|
||||||
|
|
||||||
- Chrome browser included.
|
- Chrome browser included.
|
||||||
- Super fast, no registration needed setup.
|
- Super fast, no registration needed setup.
|
||||||
@@ -22,11 +22,11 @@ _Live your data-life pro-actively._
|
|||||||
|
|
||||||
Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (included as part of our subscription service)
|
Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (included as part of our subscription service)
|
||||||
|
|
||||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/visualselector-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />](https://lemonade.changedetection.io/start?src=github)
|
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/visualselector-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />](https://changedetection.io?src=github)
|
||||||
|
|
||||||
### Easily see what changed, examine by word, line, or individual character.
|
### Easily see what changed, examine by word, line, or individual character.
|
||||||
|
|
||||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />](https://lemonade.changedetection.io/start?src=github)
|
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />](https://changedetection.io?src=github)
|
||||||
|
|
||||||
|
|
||||||
### Perform interactive browser steps
|
### Perform interactive browser steps
|
||||||
@@ -35,7 +35,7 @@ Fill in text boxes, click buttons and more, setup your changedetection scenario.
|
|||||||
|
|
||||||
Using the **Browser Steps** configuration, add basic steps before performing change detection, such as logging into websites, adding a product to a cart, accept cookie logins, entering dates and refining searches.
|
Using the **Browser Steps** configuration, add basic steps before performing change detection, such as logging into websites, adding a product to a cart, accept cookie logins, entering dates and refining searches.
|
||||||
|
|
||||||
[<img src="docs/browsersteps-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Website change detection with interactive browser steps, login, cookies etc" />](https://lemonade.changedetection.io/start?src=github)
|
[<img src="docs/browsersteps-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Website change detection with interactive browser steps, login, cookies etc" />](https://changedetection.io?src=github)
|
||||||
|
|
||||||
After **Browser Steps** have been run, then visit the **Visual Selector** tab to refine the content you're interested in.
|
After **Browser Steps** have been run, then visit the **Visual Selector** tab to refine the content you're interested in.
|
||||||
Requires Playwright to be enabled.
|
Requires Playwright to be enabled.
|
||||||
@@ -66,6 +66,7 @@ Requires Playwright to be enabled.
|
|||||||
- Proactively search for jobs, get notified when companies update their careers page, search job portals for keywords.
|
- Proactively search for jobs, get notified when companies update their careers page, search job portals for keywords.
|
||||||
- Get alerts when new job positions are open on Bamboo HR and other job platforms
|
- Get alerts when new job positions are open on Bamboo HR and other job platforms
|
||||||
- Website defacement monitoring
|
- Website defacement monitoring
|
||||||
|
- Pokémon Card Restock Tracker / Pokémon TCG Tracker
|
||||||
|
|
||||||
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_
|
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_
|
||||||
|
|
||||||
@@ -144,7 +145,7 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
|
|||||||
## Filters
|
## Filters
|
||||||
|
|
||||||
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
||||||
(We support LXML `re:test`, `re:math` and `re:replace`.)
|
(We support LXML `re:test`, `re:match` and `re:replace`.)
|
||||||
|
|
||||||
## Notifications
|
## Notifications
|
||||||
|
|
||||||
@@ -237,7 +238,7 @@ Supports managing the website watch list [via our API](https://changedetection.i
|
|||||||
Do you use changedetection.io to make money? does it save you time or money? Does it make your life easier? less stressful? Remember, we write this software when we should be doing actual paid work, we have to buy food and pay rent just like you.
|
Do you use changedetection.io to make money? does it save you time or money? Does it make your life easier? less stressful? Remember, we write this software when we should be doing actual paid work, we have to buy food and pay rent just like you.
|
||||||
|
|
||||||
|
|
||||||
Firstly, consider taking out a [change detection monthly subscription - unlimited checks and watches](https://lemonade.changedetection.io/start) , even if you don't use it, you still get the warm fuzzy feeling of helping out the project. (And who knows, you might just use it!)
|
Firstly, consider taking out a [change detection monthly subscription - unlimited checks and watches](https://changedetection.io?src=github) , even if you don't use it, you still get the warm fuzzy feeling of helping out the project. (And who knows, you might just use it!)
|
||||||
|
|
||||||
Or directly donate an amount PayPal [](https://www.paypal.com/donate/?hosted_button_id=7CP6HR9ZCNDYJ)
|
Or directly donate an amount PayPal [](https://www.paypal.com/donate/?hosted_button_id=7CP6HR9ZCNDYJ)
|
||||||
|
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ from flask_paginate import Pagination, get_page_parameter
|
|||||||
from changedetectionio import html_tools
|
from changedetectionio import html_tools
|
||||||
from changedetectionio.api import api_v1
|
from changedetectionio.api import api_v1
|
||||||
|
|
||||||
__version__ = '0.41.1'
|
__version__ = '0.42.2'
|
||||||
|
|
||||||
datastore = None
|
datastore = None
|
||||||
|
|
||||||
@@ -124,6 +124,15 @@ def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
|
|||||||
|
|
||||||
return timeago.format(timestamp, time.time())
|
return timeago.format(timestamp, time.time())
|
||||||
|
|
||||||
|
|
||||||
|
@app.template_filter('pagination_slice')
|
||||||
|
def _jinja2_filter_pagination_slice(arr, skip):
|
||||||
|
per_page = datastore.data['settings']['application'].get('pager_size', 50)
|
||||||
|
if per_page:
|
||||||
|
return arr[skip:skip + per_page]
|
||||||
|
|
||||||
|
return arr
|
||||||
|
|
||||||
@app.template_filter('format_seconds_ago')
|
@app.template_filter('format_seconds_ago')
|
||||||
def _jinja2_filter_seconds_precise(timestamp):
|
def _jinja2_filter_seconds_precise(timestamp):
|
||||||
if timestamp == False:
|
if timestamp == False:
|
||||||
@@ -403,27 +412,40 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
# Sort by last_changed and add the uuid which is usually the key..
|
# Sort by last_changed and add the uuid which is usually the key..
|
||||||
sorted_watches = []
|
sorted_watches = []
|
||||||
|
search_q = request.args.get('q').strip().lower() if request.args.get('q') else False
|
||||||
for uuid, watch in datastore.data['watching'].items():
|
for uuid, watch in datastore.data['watching'].items():
|
||||||
|
|
||||||
if limit_tag != None:
|
if limit_tag:
|
||||||
# Support for comma separated list of tags.
|
# Support for comma separated list of tags.
|
||||||
if watch['tag'] is None:
|
if not watch.get('tag'):
|
||||||
continue
|
continue
|
||||||
for tag_in_watch in watch['tag'].split(','):
|
for tag_in_watch in watch.get('tag', '').split(','):
|
||||||
tag_in_watch = tag_in_watch.strip()
|
tag_in_watch = tag_in_watch.strip()
|
||||||
if tag_in_watch == limit_tag:
|
if tag_in_watch == limit_tag:
|
||||||
watch['uuid'] = uuid
|
watch['uuid'] = uuid
|
||||||
sorted_watches.append(watch)
|
if search_q:
|
||||||
|
if (watch.get('title') and search_q in watch.get('title').lower()) or search_q in watch.get('url', '').lower():
|
||||||
|
sorted_watches.append(watch)
|
||||||
|
else:
|
||||||
|
sorted_watches.append(watch)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
watch['uuid'] = uuid
|
#watch['uuid'] = uuid
|
||||||
sorted_watches.append(watch)
|
if search_q:
|
||||||
|
if (watch.get('title') and search_q in watch.get('title').lower()) or search_q in watch.get('url', '').lower():
|
||||||
|
sorted_watches.append(watch)
|
||||||
|
else:
|
||||||
|
sorted_watches.append(watch)
|
||||||
|
|
||||||
existing_tags = datastore.get_all_tags()
|
existing_tags = datastore.get_all_tags()
|
||||||
form = forms.quickWatchForm(request.form)
|
form = forms.quickWatchForm(request.form)
|
||||||
page = request.args.get(get_page_parameter(), type=int, default=1)
|
page = request.args.get(get_page_parameter(), type=int, default=1)
|
||||||
total_count = len(sorted_watches) if sorted_watches else len(datastore.data['watching'])
|
total_count = len(sorted_watches)
|
||||||
pagination = Pagination(page=page, total=total_count, per_page=int(os.getenv('pagination_per_page', 50)), css_framework = "semantic")
|
|
||||||
|
pagination = Pagination(page=page,
|
||||||
|
total=total_count,
|
||||||
|
per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic")
|
||||||
|
|
||||||
|
|
||||||
output = render_template(
|
output = render_template(
|
||||||
"watch-overview.html",
|
"watch-overview.html",
|
||||||
@@ -437,6 +459,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||||
pagination=pagination,
|
pagination=pagination,
|
||||||
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
||||||
|
search_q=request.args.get('q','').strip(),
|
||||||
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||||
@@ -690,6 +713,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
form=form,
|
form=form,
|
||||||
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||||
has_empty_checktime=using_default_check_time,
|
has_empty_checktime=using_default_check_time,
|
||||||
|
has_extra_headers_file=watch.has_extra_headers_file or datastore.has_extra_headers_file,
|
||||||
is_html_webdriver=is_html_webdriver,
|
is_html_webdriver=is_html_webdriver,
|
||||||
jq_support=jq_support,
|
jq_support=jq_support,
|
||||||
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||||
@@ -1313,6 +1337,13 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
||||||
flash("{} watches queued for rechecking".format(len(uuids)))
|
flash("{} watches queued for rechecking".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'clear-history'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.clear_watch_history(uuid)
|
||||||
|
flash("{} watches cleared/reset.".format(len(uuids)))
|
||||||
|
|
||||||
elif (op == 'notification-default'):
|
elif (op == 'notification-default'):
|
||||||
from changedetectionio.notification import (
|
from changedetectionio.notification import (
|
||||||
default_notification_format_for_watch
|
default_notification_format_for_watch
|
||||||
@@ -1427,6 +1458,7 @@ def check_for_new_version():
|
|||||||
# Check daily
|
# Check daily
|
||||||
app.config.exit.wait(86400)
|
app.config.exit.wait(86400)
|
||||||
|
|
||||||
|
|
||||||
def notification_runner():
|
def notification_runner():
|
||||||
global notification_debug_log
|
global notification_debug_log
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|||||||
@@ -27,58 +27,106 @@ import os
|
|||||||
import logging
|
import logging
|
||||||
from changedetectionio.store import ChangeDetectionStore
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
from changedetectionio import login_optionally_required
|
from changedetectionio import login_optionally_required
|
||||||
browsersteps_live_ui_o = {}
|
|
||||||
browsersteps_playwright_browser_interface = None
|
|
||||||
browsersteps_playwright_browser_interface_browser = None
|
|
||||||
browsersteps_playwright_browser_interface_context = None
|
|
||||||
browsersteps_playwright_browser_interface_end_time = None
|
|
||||||
browsersteps_playwright_browser_interface_start_time = None
|
|
||||||
|
|
||||||
def cleanup_playwright_session():
|
browsersteps_sessions = {}
|
||||||
|
io_interface_context = None
|
||||||
|
|
||||||
global browsersteps_live_ui_o
|
|
||||||
global browsersteps_playwright_browser_interface
|
|
||||||
global browsersteps_playwright_browser_interface_browser
|
|
||||||
global browsersteps_playwright_browser_interface_context
|
|
||||||
global browsersteps_playwright_browser_interface_end_time
|
|
||||||
global browsersteps_playwright_browser_interface_start_time
|
|
||||||
|
|
||||||
browsersteps_live_ui_o = {}
|
|
||||||
browsersteps_playwright_browser_interface = None
|
|
||||||
browsersteps_playwright_browser_interface_browser = None
|
|
||||||
browsersteps_playwright_browser_interface_end_time = None
|
|
||||||
browsersteps_playwright_browser_interface_start_time = None
|
|
||||||
|
|
||||||
print("Cleaning up old playwright session because time was up, calling .goodbye()")
|
|
||||||
try:
|
|
||||||
browsersteps_playwright_browser_interface_context.goodbye()
|
|
||||||
except Exception as e:
|
|
||||||
print ("Got exception in shutdown, probably OK")
|
|
||||||
print (str(e))
|
|
||||||
|
|
||||||
browsersteps_playwright_browser_interface_context = None
|
|
||||||
|
|
||||||
print ("Cleaning up old playwright session because time was up - done")
|
|
||||||
|
|
||||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
|
|
||||||
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
||||||
|
|
||||||
|
def start_browsersteps_session(watch_uuid):
|
||||||
|
from . import nonContext
|
||||||
|
from . import browser_steps
|
||||||
|
import time
|
||||||
|
global browsersteps_sessions
|
||||||
|
global io_interface_context
|
||||||
|
|
||||||
|
|
||||||
|
# We keep the playwright session open for many minutes
|
||||||
|
seconds_keepalive = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||||
|
|
||||||
|
browsersteps_start_session = {'start_time': time.time()}
|
||||||
|
|
||||||
|
# You can only have one of these running
|
||||||
|
# This should be very fine to leave running for the life of the application
|
||||||
|
# @idea - Make it global so the pool of watch fetchers can use it also
|
||||||
|
if not io_interface_context:
|
||||||
|
io_interface_context = nonContext.c_sync_playwright()
|
||||||
|
# Start the Playwright context, which is actually a nodejs sub-process and communicates over STDIN/STDOUT pipes
|
||||||
|
io_interface_context = io_interface_context.start()
|
||||||
|
|
||||||
|
|
||||||
|
# keep it alive for 10 seconds more than we advertise, sometimes it helps to keep it shutting down cleanly
|
||||||
|
keepalive = "&timeout={}".format(((seconds_keepalive + 3) * 1000))
|
||||||
|
try:
|
||||||
|
browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(
|
||||||
|
os.getenv('PLAYWRIGHT_DRIVER_URL', '') + keepalive)
|
||||||
|
except Exception as e:
|
||||||
|
if 'ECONNREFUSED' in str(e):
|
||||||
|
return make_response('Unable to start the Playwright Browser session, is it running?', 401)
|
||||||
|
else:
|
||||||
|
return make_response(str(e), 401)
|
||||||
|
|
||||||
|
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||||
|
proxy = None
|
||||||
|
if proxy_id:
|
||||||
|
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
||||||
|
if proxy_url:
|
||||||
|
|
||||||
|
# Playwright needs separate username and password values
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
parsed = urlparse(proxy_url)
|
||||||
|
proxy = {'server': proxy_url}
|
||||||
|
|
||||||
|
if parsed.username:
|
||||||
|
proxy['username'] = parsed.username
|
||||||
|
|
||||||
|
if parsed.password:
|
||||||
|
proxy['password'] = parsed.password
|
||||||
|
|
||||||
|
print("Browser Steps: UUID {} selected proxy {}".format(watch_uuid, proxy_url))
|
||||||
|
|
||||||
|
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
||||||
|
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
||||||
|
playwright_browser=browsersteps_start_session['browser'],
|
||||||
|
proxy=proxy)
|
||||||
|
|
||||||
|
# For test
|
||||||
|
#browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||||
|
|
||||||
|
return browsersteps_start_session
|
||||||
|
|
||||||
|
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['GET', 'POST'])
|
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||||
|
def browsersteps_start_session():
|
||||||
|
# A new session was requested, return sessionID
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
global browsersteps_sessions
|
||||||
|
|
||||||
|
browsersteps_session_id = str(uuid.uuid4())
|
||||||
|
watch_uuid = request.args.get('uuid')
|
||||||
|
|
||||||
|
if not watch_uuid:
|
||||||
|
return make_response('No Watch UUID specified', 500)
|
||||||
|
|
||||||
|
print("Starting connection with playwright")
|
||||||
|
logging.debug("browser_steps.py connecting")
|
||||||
|
browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
|
||||||
|
print("Starting connection with playwright - done")
|
||||||
|
return {'browsersteps_session_id': browsersteps_session_id}
|
||||||
|
|
||||||
|
# A request for an action was received
|
||||||
|
@login_optionally_required
|
||||||
|
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||||
def browsersteps_ui_update():
|
def browsersteps_ui_update():
|
||||||
import base64
|
import base64
|
||||||
import playwright._impl._api_types
|
import playwright._impl._api_types
|
||||||
import time
|
global browsersteps_sessions
|
||||||
|
|
||||||
from changedetectionio.blueprint.browser_steps import browser_steps
|
from changedetectionio.blueprint.browser_steps import browser_steps
|
||||||
|
|
||||||
global browsersteps_live_ui_o, browsersteps_playwright_browser_interface_end_time
|
|
||||||
global browsersteps_playwright_browser_interface_browser
|
|
||||||
global browsersteps_playwright_browser_interface
|
|
||||||
global browsersteps_playwright_browser_interface_start_time
|
|
||||||
|
|
||||||
step_n = None
|
|
||||||
remaining =0
|
remaining =0
|
||||||
uuid = request.args.get('uuid')
|
uuid = request.args.get('uuid')
|
||||||
|
|
||||||
@@ -87,13 +135,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
if not browsersteps_session_id:
|
if not browsersteps_session_id:
|
||||||
return make_response('No browsersteps_session_id specified', 500)
|
return make_response('No browsersteps_session_id specified', 500)
|
||||||
|
|
||||||
# Because we don't "really" run in a context manager ( we make the playwright interface global/long-living )
|
if not browsersteps_sessions.get(browsersteps_session_id):
|
||||||
# We need to manage the shutdown when the time is up
|
return make_response('No session exists under that ID', 500)
|
||||||
if browsersteps_playwright_browser_interface_end_time:
|
|
||||||
remaining = browsersteps_playwright_browser_interface_end_time-time.time()
|
|
||||||
if browsersteps_playwright_browser_interface_end_time and remaining <= 0:
|
|
||||||
cleanup_playwright_session()
|
|
||||||
return make_response('Browser session expired, please reload the Browser Steps interface', 401)
|
|
||||||
|
|
||||||
# Actions - step/apply/etc, do the thing and return state
|
# Actions - step/apply/etc, do the thing and return state
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
@@ -112,12 +156,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
# @todo try.. accept.. nice errors not popups..
|
# @todo try.. accept.. nice errors not popups..
|
||||||
try:
|
try:
|
||||||
|
|
||||||
this_session = browsersteps_live_ui_o.get(browsersteps_session_id)
|
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation,
|
||||||
if not this_session:
|
|
||||||
print("Browser exited")
|
|
||||||
return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
|
||||||
|
|
||||||
this_session.call_action(action_name=step_operation,
|
|
||||||
selector=step_selector,
|
selector=step_selector,
|
||||||
optional_value=step_optional_value)
|
optional_value=step_optional_value)
|
||||||
|
|
||||||
@@ -129,99 +168,43 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
# Get visual selector ready/update its data (also use the current filter info from the page?)
|
# Get visual selector ready/update its data (also use the current filter info from the page?)
|
||||||
# When the last 'apply' button was pressed
|
# When the last 'apply' button was pressed
|
||||||
# @todo this adds overhead because the xpath selection is happening twice
|
# @todo this adds overhead because the xpath selection is happening twice
|
||||||
u = this_session.page.url
|
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||||
if is_last_step and u:
|
if is_last_step and u:
|
||||||
(screenshot, xpath_data) = this_session.request_visualselector_data()
|
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data()
|
||||||
datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
|
datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
|
||||||
datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
|
datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
|
||||||
|
|
||||||
# Setup interface
|
# if not this_session.page:
|
||||||
if request.method == 'GET':
|
# cleanup_playwright_session()
|
||||||
|
# return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
||||||
|
|
||||||
if not browsersteps_playwright_browser_interface:
|
# Screenshots and other info only needed on requesting a step (POST)
|
||||||
print("Starting connection with playwright")
|
try:
|
||||||
logging.debug("browser_steps.py connecting")
|
state = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||||
|
except playwright._impl._api_types.Error as e:
|
||||||
|
return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
|
||||||
|
|
||||||
global browsersteps_playwright_browser_interface_context
|
# Use send_file() which is way faster than read/write loop on bytes
|
||||||
from . import nonContext
|
import json
|
||||||
browsersteps_playwright_browser_interface_context = nonContext.c_sync_playwright()
|
from tempfile import mkstemp
|
||||||
browsersteps_playwright_browser_interface = browsersteps_playwright_browser_interface_context.start()
|
from flask import send_file
|
||||||
|
tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-")
|
||||||
|
|
||||||
time.sleep(1)
|
output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format(
|
||||||
# At 20 minutes, some other variable is closing it
|
base64.b64encode(state[0]).decode('ascii')),
|
||||||
# @todo find out what it is and set it
|
'xpath_data': state[1],
|
||||||
seconds_keepalive = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
'session_age_start': browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
|
||||||
|
'browser_time_remaining': round(remaining)
|
||||||
|
})
|
||||||
|
|
||||||
# keep it alive for 10 seconds more than we advertise, sometimes it helps to keep it shutting down cleanly
|
with os.fdopen(tmp_fd, 'w') as f:
|
||||||
keepalive = "&timeout={}".format(((seconds_keepalive+3) * 1000))
|
f.write(output)
|
||||||
try:
|
|
||||||
browsersteps_playwright_browser_interface_browser = browsersteps_playwright_browser_interface.chromium.connect_over_cdp(
|
|
||||||
os.getenv('PLAYWRIGHT_DRIVER_URL', '') + keepalive)
|
|
||||||
except Exception as e:
|
|
||||||
if 'ECONNREFUSED' in str(e):
|
|
||||||
return make_response('Unable to start the Playwright session properly, is it running?', 401)
|
|
||||||
|
|
||||||
browsersteps_playwright_browser_interface_end_time = time.time() + (seconds_keepalive-3)
|
response = make_response(send_file(path_or_file=tmp_file,
|
||||||
print("Starting connection with playwright - done")
|
mimetype='application/json; charset=UTF-8',
|
||||||
|
etag=True))
|
||||||
if not browsersteps_live_ui_o.get(browsersteps_session_id):
|
# No longer needed
|
||||||
# Boot up a new session
|
os.unlink(tmp_file)
|
||||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
|
||||||
proxy = None
|
|
||||||
if proxy_id:
|
|
||||||
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
|
||||||
if proxy_url:
|
|
||||||
proxy = {'server': proxy_url}
|
|
||||||
print("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
|
|
||||||
|
|
||||||
# Begin the new "Playwright Context" that re-uses the playwright interface
|
|
||||||
# Each session is a "Playwright Context" as a list, that uses the playwright interface
|
|
||||||
browsersteps_live_ui_o[browsersteps_session_id] = browser_steps.browsersteps_live_ui(
|
|
||||||
playwright_browser=browsersteps_playwright_browser_interface_browser,
|
|
||||||
proxy=proxy)
|
|
||||||
this_session = browsersteps_live_ui_o[browsersteps_session_id]
|
|
||||||
|
|
||||||
if not this_session.page:
|
|
||||||
cleanup_playwright_session()
|
|
||||||
return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
|
||||||
|
|
||||||
response = None
|
|
||||||
|
|
||||||
if request.method == 'POST':
|
|
||||||
# Screenshots and other info only needed on requesting a step (POST)
|
|
||||||
try:
|
|
||||||
state = this_session.get_current_state()
|
|
||||||
except playwright._impl._api_types.Error as e:
|
|
||||||
return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
|
|
||||||
|
|
||||||
# Use send_file() which is way faster than read/write loop on bytes
|
|
||||||
import json
|
|
||||||
from tempfile import mkstemp
|
|
||||||
from flask import send_file
|
|
||||||
tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-")
|
|
||||||
|
|
||||||
output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format(
|
|
||||||
base64.b64encode(state[0]).decode('ascii')),
|
|
||||||
'xpath_data': state[1],
|
|
||||||
'session_age_start': this_session.age_start,
|
|
||||||
'browser_time_remaining': round(remaining)
|
|
||||||
})
|
|
||||||
|
|
||||||
with os.fdopen(tmp_fd, 'w') as f:
|
|
||||||
f.write(output)
|
|
||||||
|
|
||||||
response = make_response(send_file(path_or_file=tmp_file,
|
|
||||||
mimetype='application/json; charset=UTF-8',
|
|
||||||
etag=True))
|
|
||||||
# No longer needed
|
|
||||||
os.unlink(tmp_file)
|
|
||||||
|
|
||||||
elif request.method == 'GET':
|
|
||||||
# Just enough to get the session rolling, it will call for goto-site via POST next
|
|
||||||
response = make_response({
|
|
||||||
'session_age_start': this_session.age_start,
|
|
||||||
'browser_time_remaining': round(remaining)
|
|
||||||
})
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|||||||
@@ -71,10 +71,10 @@ class steppable_browser_interface():
|
|||||||
optional_value = str(jinja2_env.from_string(optional_value).render())
|
optional_value = str(jinja2_env.from_string(optional_value).render())
|
||||||
|
|
||||||
action_handler(selector, optional_value)
|
action_handler(selector, optional_value)
|
||||||
self.page.wait_for_timeout(3 * 1000)
|
self.page.wait_for_timeout(1.5 * 1000)
|
||||||
print("Call action done in", time.time() - now)
|
print("Call action done in", time.time() - now)
|
||||||
|
|
||||||
def action_goto_url(self, selector, value):
|
def action_goto_url(self, selector=None, value=None):
|
||||||
# self.page.set_viewport_size({"width": 1280, "height": 5000})
|
# self.page.set_viewport_size({"width": 1280, "height": 5000})
|
||||||
now = time.time()
|
now = time.time()
|
||||||
response = self.page.goto(value, timeout=0, wait_until='commit')
|
response = self.page.goto(value, timeout=0, wait_until='commit')
|
||||||
@@ -105,7 +105,8 @@ class steppable_browser_interface():
|
|||||||
print("Clicking element")
|
print("Clicking element")
|
||||||
if not len(selector.strip()):
|
if not len(selector.strip()):
|
||||||
return
|
return
|
||||||
self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))
|
|
||||||
|
self.page.click(selector=selector, timeout=30 * 1000, delay=randint(200, 500))
|
||||||
|
|
||||||
def action_click_element_if_exists(self, selector, value):
|
def action_click_element_if_exists(self, selector, value):
|
||||||
import playwright._impl._api_types as _api_types
|
import playwright._impl._api_types as _api_types
|
||||||
@@ -132,18 +133,18 @@ class steppable_browser_interface():
|
|||||||
self.page.wait_for_timeout(1000)
|
self.page.wait_for_timeout(1000)
|
||||||
|
|
||||||
def action_wait_for_seconds(self, selector, value):
|
def action_wait_for_seconds(self, selector, value):
|
||||||
self.page.wait_for_timeout(int(value) * 1000)
|
self.page.wait_for_timeout(float(value.strip()) * 1000)
|
||||||
|
|
||||||
def action_wait_for_text(self, selector, value):
|
def action_wait_for_text(self, selector, value):
|
||||||
import json
|
import json
|
||||||
v = json.dumps(value)
|
v = json.dumps(value)
|
||||||
self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000)
|
self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=90000)
|
||||||
|
|
||||||
def action_wait_for_text_in_element(self, selector, value):
|
def action_wait_for_text_in_element(self, selector, value):
|
||||||
import json
|
import json
|
||||||
s = json.dumps(selector)
|
s = json.dumps(selector)
|
||||||
v = json.dumps(value)
|
v = json.dumps(value)
|
||||||
self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000)
|
self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=90000)
|
||||||
|
|
||||||
# @todo - in the future make some popout interface to capture what needs to be set
|
# @todo - in the future make some popout interface to capture what needs to be set
|
||||||
# https://playwright.dev/python/docs/api/class-keyboard
|
# https://playwright.dev/python/docs/api/class-keyboard
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import time
|
|||||||
|
|
||||||
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
|
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary'
|
||||||
|
|
||||||
|
|
||||||
class Non200ErrorCodeReceived(Exception):
|
class Non200ErrorCodeReceived(Exception):
|
||||||
def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
|
def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
|
||||||
# Set this so we can use it in other parts of the app
|
# Set this so we can use it in other parts of the app
|
||||||
@@ -24,10 +25,12 @@ class Non200ErrorCodeReceived(Exception):
|
|||||||
self.page_text = html_tools.html_to_text(page_html)
|
self.page_text = html_tools.html_to_text(page_html)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class checksumFromPreviousCheckWasTheSame(Exception):
|
class checksumFromPreviousCheckWasTheSame(Exception):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class JSActionExceptions(Exception):
|
class JSActionExceptions(Exception):
|
||||||
def __init__(self, status_code, url, screenshot, message=''):
|
def __init__(self, status_code, url, screenshot, message=''):
|
||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
@@ -36,6 +39,7 @@ class JSActionExceptions(Exception):
|
|||||||
self.message = message
|
self.message = message
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class BrowserStepsStepTimout(Exception):
|
class BrowserStepsStepTimout(Exception):
|
||||||
def __init__(self, step_n):
|
def __init__(self, step_n):
|
||||||
self.step_n = step_n
|
self.step_n = step_n
|
||||||
@@ -51,6 +55,7 @@ class PageUnloadable(Exception):
|
|||||||
self.message = message
|
self.message = message
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class EmptyReply(Exception):
|
class EmptyReply(Exception):
|
||||||
def __init__(self, status_code, url, screenshot=None):
|
def __init__(self, status_code, url, screenshot=None):
|
||||||
# Set this so we can use it in other parts of the app
|
# Set this so we can use it in other parts of the app
|
||||||
@@ -59,6 +64,7 @@ class EmptyReply(Exception):
|
|||||||
self.screenshot = screenshot
|
self.screenshot = screenshot
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class ScreenshotUnavailable(Exception):
|
class ScreenshotUnavailable(Exception):
|
||||||
def __init__(self, status_code, url, page_html=None):
|
def __init__(self, status_code, url, page_html=None):
|
||||||
# Set this so we can use it in other parts of the app
|
# Set this so we can use it in other parts of the app
|
||||||
@@ -69,6 +75,7 @@ class ScreenshotUnavailable(Exception):
|
|||||||
self.page_text = html_to_text(page_html)
|
self.page_text = html_to_text(page_html)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class ReplyWithContentButNoText(Exception):
|
class ReplyWithContentButNoText(Exception):
|
||||||
def __init__(self, status_code, url, screenshot=None):
|
def __init__(self, status_code, url, screenshot=None):
|
||||||
# Set this so we can use it in other parts of the app
|
# Set this so we can use it in other parts of the app
|
||||||
@@ -77,13 +84,14 @@ class ReplyWithContentButNoText(Exception):
|
|||||||
self.screenshot = screenshot
|
self.screenshot = screenshot
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class Fetcher():
|
class Fetcher():
|
||||||
browser_steps = None
|
browser_steps = None
|
||||||
browser_steps_screenshot_path = None
|
browser_steps_screenshot_path = None
|
||||||
content = None
|
content = None
|
||||||
error = None
|
error = None
|
||||||
fetcher_description = "No description"
|
fetcher_description = "No description"
|
||||||
headers = None
|
headers = {}
|
||||||
status_code = None
|
status_code = None
|
||||||
webdriver_js_execute_code = None
|
webdriver_js_execute_code = None
|
||||||
xpath_data = None
|
xpath_data = None
|
||||||
@@ -105,7 +113,6 @@ class Fetcher():
|
|||||||
self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8')
|
self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8')
|
||||||
self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8')
|
self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_error(self):
|
def get_error(self):
|
||||||
return self.error
|
return self.error
|
||||||
@@ -140,6 +147,13 @@ class Fetcher():
|
|||||||
def is_ready(self):
|
def is_ready(self):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def get_all_headers(self):
|
||||||
|
"""
|
||||||
|
Get all headers but ensure all keys are lowercase
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
return {k.lower(): v for k, v in self.headers.items()}
|
||||||
|
|
||||||
def iterate_browser_steps(self):
|
def iterate_browser_steps(self):
|
||||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||||
from playwright._impl._api_types import TimeoutError
|
from playwright._impl._api_types import TimeoutError
|
||||||
@@ -152,13 +166,15 @@ class Fetcher():
|
|||||||
interface = steppable_browser_interface()
|
interface = steppable_browser_interface()
|
||||||
interface.page = self.page
|
interface.page = self.page
|
||||||
|
|
||||||
valid_steps = filter(lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), self.browser_steps)
|
valid_steps = filter(
|
||||||
|
lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'),
|
||||||
|
self.browser_steps)
|
||||||
|
|
||||||
for step in valid_steps:
|
for step in valid_steps:
|
||||||
step_n += 1
|
step_n += 1
|
||||||
print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
|
print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
|
||||||
self.screenshot_step("before-"+str(step_n))
|
self.screenshot_step("before-" + str(step_n))
|
||||||
self.save_step_html("before-"+str(step_n))
|
self.save_step_html("before-" + str(step_n))
|
||||||
try:
|
try:
|
||||||
optional_value = step['optional_value']
|
optional_value = step['optional_value']
|
||||||
selector = step['selector']
|
selector = step['selector']
|
||||||
@@ -173,12 +189,11 @@ class Fetcher():
|
|||||||
optional_value=optional_value)
|
optional_value=optional_value)
|
||||||
self.screenshot_step(step_n)
|
self.screenshot_step(step_n)
|
||||||
self.save_step_html(step_n)
|
self.save_step_html(step_n)
|
||||||
except TimeoutError:
|
except TimeoutError as e:
|
||||||
|
print(str(e))
|
||||||
# Stop processing here
|
# Stop processing here
|
||||||
raise BrowserStepsStepTimout(step_n=step_n)
|
raise BrowserStepsStepTimout(step_n=step_n)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# It's always good to reset these
|
# It's always good to reset these
|
||||||
def delete_browser_steps_screenshots(self):
|
def delete_browser_steps_screenshots(self):
|
||||||
import glob
|
import glob
|
||||||
@@ -188,6 +203,7 @@ class Fetcher():
|
|||||||
for f in files:
|
for f in files:
|
||||||
os.unlink(f)
|
os.unlink(f)
|
||||||
|
|
||||||
|
|
||||||
# Maybe for the future, each fetcher provides its own diff output, could be used for text, image
|
# Maybe for the future, each fetcher provides its own diff output, could be used for text, image
|
||||||
# the current one would return javascript output (as we use JS to generate the diff)
|
# the current one would return javascript output (as we use JS to generate the diff)
|
||||||
#
|
#
|
||||||
@@ -205,6 +221,7 @@ def available_fetchers():
|
|||||||
|
|
||||||
return p
|
return p
|
||||||
|
|
||||||
|
|
||||||
class base_html_playwright(Fetcher):
|
class base_html_playwright(Fetcher):
|
||||||
fetcher_description = "Playwright {}/Javascript".format(
|
fetcher_description = "Playwright {}/Javascript".format(
|
||||||
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
|
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
|
||||||
@@ -268,6 +285,119 @@ class base_html_playwright(Fetcher):
|
|||||||
with open(destination, 'w') as f:
|
with open(destination, 'w') as f:
|
||||||
f.write(content)
|
f.write(content)
|
||||||
|
|
||||||
|
def run_fetch_browserless_puppeteer(self,
|
||||||
|
url,
|
||||||
|
timeout,
|
||||||
|
request_headers,
|
||||||
|
request_body,
|
||||||
|
request_method,
|
||||||
|
ignore_status_codes=False,
|
||||||
|
current_include_filters=None,
|
||||||
|
is_binary=False):
|
||||||
|
|
||||||
|
from pkg_resources import resource_string
|
||||||
|
|
||||||
|
extra_wait_ms = (int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) * 1000
|
||||||
|
|
||||||
|
self.xpath_element_js = self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
|
||||||
|
code = resource_string(__name__, "res/puppeteer_fetch.js").decode('utf-8')
|
||||||
|
# In the future inject this is a proper JS package
|
||||||
|
code = code.replace('%xpath_scrape_code%', self.xpath_element_js)
|
||||||
|
code = code.replace('%instock_scrape_code%', self.instock_data_js)
|
||||||
|
|
||||||
|
from requests.exceptions import ConnectTimeout, ReadTimeout
|
||||||
|
wait_browserless_seconds = 240
|
||||||
|
|
||||||
|
browserless_function_url = os.getenv('BROWSERLESS_FUNCTION_URL')
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
if not browserless_function_url:
|
||||||
|
# Convert/try to guess from PLAYWRIGHT_DRIVER_URL
|
||||||
|
o = urlparse(os.getenv('PLAYWRIGHT_DRIVER_URL'))
|
||||||
|
browserless_function_url = o._replace(scheme="http")._replace(path="function").geturl()
|
||||||
|
|
||||||
|
|
||||||
|
# Append proxy connect string
|
||||||
|
if self.proxy:
|
||||||
|
import urllib.parse
|
||||||
|
# Remove username/password if it exists in the URL or you will receive "ERR_NO_SUPPORTED_PROXIES" error
|
||||||
|
# Actual authentication handled by Puppeteer/node
|
||||||
|
o = urlparse(self.proxy.get('server'))
|
||||||
|
proxy_url = urllib.parse.quote(o._replace(netloc="{}:{}".format(o.hostname, o.port)).geturl())
|
||||||
|
browserless_function_url = f"{browserless_function_url}&--proxy-server={proxy_url}&dumpio=true"
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
amp = '&' if '?' in browserless_function_url else '?'
|
||||||
|
response = requests.request(
|
||||||
|
method="POST",
|
||||||
|
json={
|
||||||
|
"code": code,
|
||||||
|
"context": {
|
||||||
|
# Very primitive disk cache - USE WITH EXTREME CAUTION
|
||||||
|
# Run browserless container with -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]"
|
||||||
|
'disk_cache_dir': os.getenv("PUPPETEER_DISK_CACHE", False), # or path to disk cache ending in /, ie /tmp/cache/
|
||||||
|
'execute_js': self.webdriver_js_execute_code,
|
||||||
|
'extra_wait_ms': extra_wait_ms,
|
||||||
|
'include_filters': current_include_filters,
|
||||||
|
'req_headers': request_headers,
|
||||||
|
'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
|
||||||
|
'url': url,
|
||||||
|
'user_agent': request_headers.get('User-Agent', 'Mozilla/5.0'),
|
||||||
|
'proxy_username': self.proxy.get('username','') if self.proxy else False,
|
||||||
|
'proxy_password': self.proxy.get('password', '') if self.proxy else False,
|
||||||
|
'no_cache_list': [
|
||||||
|
'twitter',
|
||||||
|
'.pdf'
|
||||||
|
],
|
||||||
|
# Could use https://github.com/easylist/easylist here, or install a plugin
|
||||||
|
'block_url_list': [
|
||||||
|
'adnxs.com',
|
||||||
|
'analytics.twitter.com',
|
||||||
|
'doubleclick.net',
|
||||||
|
'google-analytics.com',
|
||||||
|
'googletagmanager',
|
||||||
|
'trustpilot.com'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# @todo /function needs adding ws:// to http:// rebuild this
|
||||||
|
url=browserless_function_url+f"{amp}--disable-features=AudioServiceOutOfProcess&dumpio=true&--disable-remote-fonts",
|
||||||
|
timeout=wait_browserless_seconds)
|
||||||
|
|
||||||
|
except ReadTimeout:
|
||||||
|
raise PageUnloadable(url=url, status_code=None, message=f"No response from browserless in {wait_browserless_seconds}s")
|
||||||
|
except ConnectTimeout:
|
||||||
|
raise PageUnloadable(url=url, status_code=None, message=f"Timed out connecting to browserless, retrying..")
|
||||||
|
else:
|
||||||
|
# 200 Here means that the communication to browserless worked only, not the page state
|
||||||
|
if response.status_code == 200:
|
||||||
|
import base64
|
||||||
|
|
||||||
|
x = response.json()
|
||||||
|
if not x.get('screenshot'):
|
||||||
|
# https://github.com/puppeteer/puppeteer/blob/v1.0.0/docs/troubleshooting.md#tips
|
||||||
|
# https://github.com/puppeteer/puppeteer/issues/1834
|
||||||
|
# https://github.com/puppeteer/puppeteer/issues/1834#issuecomment-381047051
|
||||||
|
# Check your memory is shared and big enough
|
||||||
|
raise ScreenshotUnavailable(url=url, status_code=None)
|
||||||
|
|
||||||
|
if not x.get('content', '').strip():
|
||||||
|
raise EmptyReply(url=url, status_code=None)
|
||||||
|
|
||||||
|
if x.get('status_code', 200) != 200 and not ignore_status_codes:
|
||||||
|
raise Non200ErrorCodeReceived(url=url, status_code=x.get('status_code', 200), page_html=x['content'])
|
||||||
|
|
||||||
|
self.content = x.get('content')
|
||||||
|
self.headers = x.get('headers')
|
||||||
|
self.instock_data = x.get('instock_data')
|
||||||
|
self.screenshot = base64.b64decode(x.get('screenshot'))
|
||||||
|
self.status_code = x.get('status_code')
|
||||||
|
self.xpath_data = x.get('xpath_data')
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Some other error from browserless
|
||||||
|
raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8'))
|
||||||
|
|
||||||
def run(self,
|
def run(self,
|
||||||
url,
|
url,
|
||||||
timeout,
|
timeout,
|
||||||
@@ -278,6 +408,24 @@ class base_html_playwright(Fetcher):
|
|||||||
current_include_filters=None,
|
current_include_filters=None,
|
||||||
is_binary=False):
|
is_binary=False):
|
||||||
|
|
||||||
|
# For now, USE_EXPERIMENTAL_PUPPETEER_FETCH is not supported by watches with BrowserSteps (for now!)
|
||||||
|
has_browser_steps = self.browser_steps and list(filter(
|
||||||
|
lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'),
|
||||||
|
self.browser_steps))
|
||||||
|
|
||||||
|
if not has_browser_steps:
|
||||||
|
if os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH'):
|
||||||
|
# Temporary backup solution until we rewrite the playwright code
|
||||||
|
return self.run_fetch_browserless_puppeteer(
|
||||||
|
url,
|
||||||
|
timeout,
|
||||||
|
request_headers,
|
||||||
|
request_body,
|
||||||
|
request_method,
|
||||||
|
ignore_status_codes,
|
||||||
|
current_include_filters,
|
||||||
|
is_binary)
|
||||||
|
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
import playwright._impl._api_types
|
import playwright._impl._api_types
|
||||||
|
|
||||||
@@ -294,7 +442,7 @@ class base_html_playwright(Fetcher):
|
|||||||
# Set user agent to prevent Cloudflare from blocking the browser
|
# Set user agent to prevent Cloudflare from blocking the browser
|
||||||
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
||||||
context = browser.new_context(
|
context = browser.new_context(
|
||||||
user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0',
|
user_agent=request_headers.get('User-Agent', 'Mozilla/5.0'),
|
||||||
proxy=self.proxy,
|
proxy=self.proxy,
|
||||||
# This is needed to enable JavaScript execution on GitHub and others
|
# This is needed to enable JavaScript execution on GitHub and others
|
||||||
bypass_csp=True,
|
bypass_csp=True,
|
||||||
@@ -324,12 +472,12 @@ class base_html_playwright(Fetcher):
|
|||||||
except playwright._impl._api_types.Error as e:
|
except playwright._impl._api_types.Error as e:
|
||||||
# Retry once - https://github.com/browserless/chrome/issues/2485
|
# Retry once - https://github.com/browserless/chrome/issues/2485
|
||||||
# Sometimes errors related to invalid cert's and other can be random
|
# Sometimes errors related to invalid cert's and other can be random
|
||||||
print ("Content Fetcher > retrying request got error - ", str(e))
|
print("Content Fetcher > retrying request got error - ", str(e))
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
response = self.page.goto(url, wait_until='commit')
|
response = self.page.goto(url, wait_until='commit')
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print ("Content Fetcher > Other exception when page.goto", str(e))
|
print("Content Fetcher > Other exception when page.goto", str(e))
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||||
@@ -348,7 +496,7 @@ class base_html_playwright(Fetcher):
|
|||||||
# This can be ok, we will try to grab what we could retrieve
|
# This can be ok, we will try to grab what we could retrieve
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print ("Content Fetcher > Other exception when executing custom JS code", str(e))
|
print("Content Fetcher > Other exception when executing custom JS code", str(e))
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||||
@@ -356,7 +504,7 @@ class base_html_playwright(Fetcher):
|
|||||||
if response is None:
|
if response is None:
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
print ("Content Fetcher > Response object was none")
|
print("Content Fetcher > Response object was none")
|
||||||
raise EmptyReply(url=url, status_code=None)
|
raise EmptyReply(url=url, status_code=None)
|
||||||
|
|
||||||
# Run Browser Steps here
|
# Run Browser Steps here
|
||||||
@@ -370,7 +518,7 @@ class base_html_playwright(Fetcher):
|
|||||||
if len(self.page.content().strip()) == 0:
|
if len(self.page.content().strip()) == 0:
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
print ("Content Fetcher > Content was empty")
|
print("Content Fetcher > Content was empty")
|
||||||
raise EmptyReply(url=url, status_code=response.status)
|
raise EmptyReply(url=url, status_code=response.status)
|
||||||
|
|
||||||
self.status_code = response.status
|
self.status_code = response.status
|
||||||
@@ -382,7 +530,8 @@ class base_html_playwright(Fetcher):
|
|||||||
else:
|
else:
|
||||||
self.page.evaluate("var include_filters=''")
|
self.page.evaluate("var include_filters=''")
|
||||||
|
|
||||||
self.xpath_data = self.page.evaluate("async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
|
self.xpath_data = self.page.evaluate(
|
||||||
|
"async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
|
||||||
self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
|
self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
|
||||||
|
|
||||||
# Bug 3 in Playwright screenshot handling
|
# Bug 3 in Playwright screenshot handling
|
||||||
@@ -394,7 +543,8 @@ class base_html_playwright(Fetcher):
|
|||||||
# acceptable screenshot quality here
|
# acceptable screenshot quality here
|
||||||
try:
|
try:
|
||||||
# The actual screenshot
|
# The actual screenshot
|
||||||
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
|
self.screenshot = self.page.screenshot(type='jpeg', full_page=True,
|
||||||
|
quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
@@ -403,6 +553,7 @@ class base_html_playwright(Fetcher):
|
|||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
|
|
||||||
|
|
||||||
class base_html_webdriver(Fetcher):
|
class base_html_webdriver(Fetcher):
|
||||||
if os.getenv("WEBDRIVER_URL"):
|
if os.getenv("WEBDRIVER_URL"):
|
||||||
fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
|
fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
|
||||||
|
|||||||
@@ -481,6 +481,10 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
|||||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||||
ignore_whitespace = BooleanField('Ignore whitespace')
|
ignore_whitespace = BooleanField('Ignore whitespace')
|
||||||
password = SaltyPasswordField()
|
password = SaltyPasswordField()
|
||||||
|
pager_size = IntegerField('Pager size',
|
||||||
|
render_kw={"style": "width: 5em;"},
|
||||||
|
validators=[validators.NumberRange(min=0,
|
||||||
|
message="Should be atleast zero (disabled)")])
|
||||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||||
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
|
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
|
||||||
|
|||||||
@@ -137,12 +137,13 @@ def _get_stripped_text_from_json_match(match):
|
|||||||
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
|
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
|
||||||
stripped_text_from_html = False
|
stripped_text_from_html = False
|
||||||
|
|
||||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
|
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
||||||
try:
|
try:
|
||||||
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
|
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
|
|
||||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||||
|
# As a last resort, try to parse the whole <body>
|
||||||
s = []
|
s = []
|
||||||
soup = BeautifulSoup(content, 'html.parser')
|
soup = BeautifulSoup(content, 'html.parser')
|
||||||
|
|
||||||
@@ -150,32 +151,34 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
|||||||
bs_result = soup.findAll('script', {"type": "application/ld+json"})
|
bs_result = soup.findAll('script', {"type": "application/ld+json"})
|
||||||
else:
|
else:
|
||||||
bs_result = soup.findAll('script')
|
bs_result = soup.findAll('script')
|
||||||
|
bs_result += soup.findAll('body')
|
||||||
|
|
||||||
|
bs_jsons = []
|
||||||
if not bs_result:
|
|
||||||
raise JSONNotFound("No parsable JSON found in this document")
|
|
||||||
|
|
||||||
for result in bs_result:
|
for result in bs_result:
|
||||||
# Skip empty tags, and things that dont even look like JSON
|
# Skip empty tags, and things that dont even look like JSON
|
||||||
if not result.string or not '{' in result.string:
|
if not result.text or '{' not in result.text:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
json_data = json.loads(result.string)
|
json_data = json.loads(result.text)
|
||||||
|
bs_jsons.append(json_data)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
# Just skip it
|
# Skip objects which cannot be parsed
|
||||||
continue
|
continue
|
||||||
else:
|
|
||||||
stripped_text_from_html = _parse_json(json_data, json_filter)
|
if not bs_jsons:
|
||||||
if ensure_is_ldjson_info_type:
|
raise JSONNotFound("No parsable JSON found in this document")
|
||||||
# Could sometimes be list, string or something else random
|
|
||||||
if isinstance(json_data, dict):
|
for json_data in bs_jsons:
|
||||||
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||||
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
if ensure_is_ldjson_info_type:
|
||||||
if json_data.get('@type', False) and json_data.get('@type','').lower() == ensure_is_ldjson_info_type.lower() and stripped_text_from_html:
|
# Could sometimes be list, string or something else random
|
||||||
break
|
if isinstance(json_data, dict):
|
||||||
elif stripped_text_from_html:
|
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
||||||
break
|
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
||||||
|
if json_data.get('@type', False) and json_data.get('@type','').lower() == ensure_is_ldjson_info_type.lower() and stripped_text_from_html:
|
||||||
|
break
|
||||||
|
elif stripped_text_from_html:
|
||||||
|
break
|
||||||
|
|
||||||
if not stripped_text_from_html:
|
if not stripped_text_from_html:
|
||||||
# Re 265 - Just return an empty string when filter not found
|
# Re 265 - Just return an empty string when filter not found
|
||||||
|
|||||||
@@ -52,7 +52,8 @@ class import_url_list(Importer):
|
|||||||
|
|
||||||
# Flask wtform validators wont work with basic auth, use validators package
|
# Flask wtform validators wont work with basic auth, use validators package
|
||||||
# Up to 5000 per batch so we dont flood the server
|
# Up to 5000 per batch so we dont flood the server
|
||||||
if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
|
# @todo validators.url failed on local hostnames (such as referring to ourself when using browserless)
|
||||||
|
if len(url) and 'http' in url.lower() and good < 5000:
|
||||||
extras = None
|
extras = None
|
||||||
if processor:
|
if processor:
|
||||||
extras = {'processor': processor}
|
extras = {'processor': processor}
|
||||||
|
|||||||
@@ -23,25 +23,26 @@ class model(dict):
|
|||||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
|
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
|
||||||
},
|
},
|
||||||
'application': {
|
'application': {
|
||||||
|
# Custom notification content
|
||||||
'api_access_token_enabled': True,
|
'api_access_token_enabled': True,
|
||||||
'password': False,
|
|
||||||
'base_url' : None,
|
'base_url' : None,
|
||||||
'extract_title_as_title': False,
|
|
||||||
'empty_pages_are_a_change': False,
|
'empty_pages_are_a_change': False,
|
||||||
|
'extract_title_as_title': False,
|
||||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||||
'global_subtractive_selectors': [],
|
'global_subtractive_selectors': [],
|
||||||
'ignore_whitespace': True,
|
'ignore_whitespace': True,
|
||||||
'render_anchor_tag_content': False,
|
|
||||||
'notification_urls': [], # Apprise URL list
|
|
||||||
# Custom notification content
|
|
||||||
'notification_title': default_notification_title,
|
|
||||||
'notification_body': default_notification_body,
|
'notification_body': default_notification_body,
|
||||||
'notification_format': default_notification_format,
|
'notification_format': default_notification_format,
|
||||||
|
'notification_title': default_notification_title,
|
||||||
|
'notification_urls': [], # Apprise URL list
|
||||||
|
'pager_size': 50,
|
||||||
|
'password': False,
|
||||||
|
'render_anchor_tag_content': False,
|
||||||
'schema_version' : 0,
|
'schema_version' : 0,
|
||||||
'shared_diff_access': False,
|
'shared_diff_access': False,
|
||||||
'webdriver_delay': None # Extra delay in seconds before extracting text
|
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -49,3 +50,15 @@ class model(dict):
|
|||||||
def __init__(self, *arg, **kw):
|
def __init__(self, *arg, **kw):
|
||||||
super(model, self).__init__(*arg, **kw)
|
super(model, self).__init__(*arg, **kw)
|
||||||
self.update(self.base_config)
|
self.update(self.base_config)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_headers_from_text_file(filepath):
|
||||||
|
headers = {}
|
||||||
|
with open(filepath, 'r') as f:
|
||||||
|
for l in f.readlines():
|
||||||
|
l = l.strip()
|
||||||
|
if not l.startswith('#') and ':' in l:
|
||||||
|
(k, v) = l.split(':')
|
||||||
|
headers[k.strip()] = v.strip()
|
||||||
|
|
||||||
|
return headers
|
||||||
@@ -473,6 +473,40 @@ class model(dict):
|
|||||||
# None is set
|
# None is set
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_extra_headers_file(self):
|
||||||
|
if os.path.isfile(os.path.join(self.watch_data_dir, 'headers.txt')):
|
||||||
|
return True
|
||||||
|
|
||||||
|
for f in self.all_tags:
|
||||||
|
fname = "headers-"+re.sub(r'[\W_]', '', f).lower().strip() + ".txt"
|
||||||
|
filepath = os.path.join(self.__datastore_path, fname)
|
||||||
|
if os.path.isfile(filepath):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_all_headers(self):
|
||||||
|
from .App import parse_headers_from_text_file
|
||||||
|
headers = self.get('headers', {}).copy()
|
||||||
|
# Available headers on the disk could 'headers.txt' in the watch data dir
|
||||||
|
filepath = os.path.join(self.watch_data_dir, 'headers.txt')
|
||||||
|
try:
|
||||||
|
if os.path.isfile(filepath):
|
||||||
|
headers.update(parse_headers_from_text_file(filepath))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"ERROR reading headers.txt at {filepath}", str(e))
|
||||||
|
|
||||||
|
# Or each by tag, as tagname.txt in the main datadir
|
||||||
|
for f in self.all_tags:
|
||||||
|
fname = "headers-"+re.sub(r'[\W_]', '', f).lower().strip() + ".txt"
|
||||||
|
filepath = os.path.join(self.__datastore_path, fname)
|
||||||
|
try:
|
||||||
|
if os.path.isfile(filepath):
|
||||||
|
headers.update(parse_headers_from_text_file(filepath))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"ERROR reading headers.txt at {filepath}", str(e))
|
||||||
|
return headers
|
||||||
|
|
||||||
def get_last_fetched_before_filters(self):
|
def get_last_fetched_before_filters(self):
|
||||||
import brotli
|
import brotli
|
||||||
|
|||||||
@@ -12,6 +12,12 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|||||||
name = 'Re-stock detection for single product pages'
|
name = 'Re-stock detection for single product pages'
|
||||||
description = 'Detects if the product goes back to in-stock'
|
description = 'Detects if the product goes back to in-stock'
|
||||||
|
|
||||||
|
class UnableToExtractRestockData(Exception):
|
||||||
|
def __init__(self, status_code):
|
||||||
|
# Set this so we can use it in other parts of the app
|
||||||
|
self.status_code = status_code
|
||||||
|
return
|
||||||
|
|
||||||
class perform_site_check(difference_detection_processor):
|
class perform_site_check(difference_detection_processor):
|
||||||
screenshot = None
|
screenshot = None
|
||||||
xpath_data = None
|
xpath_data = None
|
||||||
@@ -105,7 +111,8 @@ class perform_site_check(difference_detection_processor):
|
|||||||
fetched_md5 = hashlib.md5(fetcher.instock_data.encode('utf-8')).hexdigest()
|
fetched_md5 = hashlib.md5(fetcher.instock_data.encode('utf-8')).hexdigest()
|
||||||
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
|
# 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
|
||||||
update_obj["in_stock"] = True if fetcher.instock_data == 'Possibly in stock' else False
|
update_obj["in_stock"] = True if fetcher.instock_data == 'Possibly in stock' else False
|
||||||
|
else:
|
||||||
|
raise UnableToExtractRestockData(status_code=fetcher.status_code)
|
||||||
|
|
||||||
# The main thing that all this at the moment comes down to :)
|
# The main thing that all this at the moment comes down to :)
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
|
|||||||
@@ -70,10 +70,9 @@ class perform_site_check(difference_detection_processor):
|
|||||||
# Unset any existing notification error
|
# Unset any existing notification error
|
||||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||||
|
|
||||||
extra_headers = watch.get('headers', [])
|
|
||||||
|
|
||||||
# Tweak the base config with the per-watch ones
|
# Tweak the base config with the per-watch ones
|
||||||
request_headers = deepcopy(self.datastore.data['settings']['headers'])
|
extra_headers = watch.get_all_headers()
|
||||||
|
request_headers = self.datastore.get_all_headers()
|
||||||
request_headers.update(extra_headers)
|
request_headers.update(extra_headers)
|
||||||
|
|
||||||
# https://github.com/psf/requests/issues/4525
|
# https://github.com/psf/requests/issues/4525
|
||||||
@@ -140,7 +139,7 @@ class perform_site_check(difference_detection_processor):
|
|||||||
self.xpath_data = fetcher.xpath_data
|
self.xpath_data = fetcher.xpath_data
|
||||||
|
|
||||||
# Track the content type
|
# Track the content type
|
||||||
update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
|
update_obj['content_type'] = fetcher.get_all_headers().get('content-type', '').lower()
|
||||||
|
|
||||||
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
||||||
# Saves a lot of CPU
|
# Saves a lot of CPU
|
||||||
@@ -160,7 +159,7 @@ class perform_site_check(difference_detection_processor):
|
|||||||
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
|
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
|
||||||
# return content().textfilter().jsonextract().checksumcompare() ?
|
# return content().textfilter().jsonextract().checksumcompare() ?
|
||||||
|
|
||||||
is_json = 'application/json' in fetcher.headers.get('Content-Type', '')
|
is_json = 'application/json' in fetcher.get_all_headers().get('content-type', '').lower()
|
||||||
is_html = not is_json
|
is_html = not is_json
|
||||||
|
|
||||||
# source: support, basically treat it as plaintext
|
# source: support, basically treat it as plaintext
|
||||||
@@ -168,7 +167,7 @@ class perform_site_check(difference_detection_processor):
|
|||||||
is_html = False
|
is_html = False
|
||||||
is_json = False
|
is_json = False
|
||||||
|
|
||||||
if watch.is_pdf or 'application/pdf' in fetcher.headers.get('Content-Type', '').lower():
|
if watch.is_pdf or 'application/pdf' in fetcher.get_all_headers().get('content-type', '').lower():
|
||||||
from shutil import which
|
from shutil import which
|
||||||
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
|
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
|
||||||
if not which(tool):
|
if not which(tool):
|
||||||
@@ -236,7 +235,7 @@ class perform_site_check(difference_detection_processor):
|
|||||||
html_content = fetcher.content
|
html_content = fetcher.content
|
||||||
|
|
||||||
# If not JSON, and if it's not text/plain..
|
# If not JSON, and if it's not text/plain..
|
||||||
if 'text/plain' in fetcher.headers.get('Content-Type', '').lower():
|
if 'text/plain' in fetcher.get_all_headers().get('content-type', '').lower():
|
||||||
# Don't run get_text or xpath/css filters on plaintext
|
# Don't run get_text or xpath/css filters on plaintext
|
||||||
stripped_text_from_html = html_content
|
stripped_text_from_html = html_content
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -0,0 +1,183 @@
|
|||||||
|
module.exports = async ({page, context}) => {
|
||||||
|
|
||||||
|
var {
|
||||||
|
url,
|
||||||
|
execute_js,
|
||||||
|
user_agent,
|
||||||
|
extra_wait_ms,
|
||||||
|
req_headers,
|
||||||
|
include_filters,
|
||||||
|
xpath_element_js,
|
||||||
|
screenshot_quality,
|
||||||
|
proxy_username,
|
||||||
|
proxy_password,
|
||||||
|
disk_cache_dir,
|
||||||
|
no_cache_list,
|
||||||
|
block_url_list,
|
||||||
|
} = context;
|
||||||
|
|
||||||
|
await page.setBypassCSP(true)
|
||||||
|
await page.setExtraHTTPHeaders(req_headers);
|
||||||
|
await page.setUserAgent(user_agent);
|
||||||
|
// https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded
|
||||||
|
|
||||||
|
await page.setDefaultNavigationTimeout(0);
|
||||||
|
|
||||||
|
if (proxy_username) {
|
||||||
|
await page.authenticate({
|
||||||
|
username: proxy_username,
|
||||||
|
password: proxy_password
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.setViewport({
|
||||||
|
width: 1024,
|
||||||
|
height: 768,
|
||||||
|
deviceScaleFactor: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
if (disk_cache_dir) {
|
||||||
|
console.log(">>>>>>>>>>>>>>> LOCAL DISK CACHE ENABLED <<<<<<<<<<<<<<<<<<<<<");
|
||||||
|
}
|
||||||
|
const fs = require('fs');
|
||||||
|
const crypto = require('crypto');
|
||||||
|
|
||||||
|
function file_is_expired(file_path) {
|
||||||
|
if (!fs.existsSync(file_path)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
var stats = fs.statSync(file_path);
|
||||||
|
const now_date = new Date();
|
||||||
|
const expire_seconds = 300;
|
||||||
|
if ((now_date / 1000) - (stats.mtime.getTime() / 1000) > expire_seconds) {
|
||||||
|
console.log("CACHE EXPIRED: " + file_path);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
page.on('request', async (request) => {
|
||||||
|
// General blocking of requests that waste traffic
|
||||||
|
if (block_url_list.some(substring => request.url().toLowerCase().includes(substring))) return request.abort();
|
||||||
|
|
||||||
|
if (disk_cache_dir) {
|
||||||
|
const url = request.url();
|
||||||
|
const key = crypto.createHash('md5').update(url).digest("hex");
|
||||||
|
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
|
||||||
|
|
||||||
|
// https://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js
|
||||||
|
|
||||||
|
if (fs.existsSync(dir_path + key)) {
|
||||||
|
console.log("* CACHE HIT , using - " + dir_path + key + " - " + url);
|
||||||
|
const cached_data = fs.readFileSync(dir_path + key);
|
||||||
|
// @todo headers can come from dir_path+key+".meta" json file
|
||||||
|
request.respond({
|
||||||
|
status: 200,
|
||||||
|
//contentType: 'text/html', //@todo
|
||||||
|
body: cached_data
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
request.continue();
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
if (disk_cache_dir) {
|
||||||
|
page.on('response', async (response) => {
|
||||||
|
const url = response.url();
|
||||||
|
// Basic filtering for sane responses
|
||||||
|
if (response.request().method() != 'GET' || response.request().resourceType() == 'xhr' || response.request().resourceType() == 'document' || response.status() != 200) {
|
||||||
|
console.log("Skipping (not useful) - Status:" + response.status() + " Method:" + response.request().method() + " ResourceType:" + response.request().resourceType() + " " + url);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (no_cache_list.some(substring => url.toLowerCase().includes(substring))) {
|
||||||
|
console.log("Skipping (no_cache_list) - " + url);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (url.toLowerCase().includes('data:')) {
|
||||||
|
console.log("Skipping (embedded-data) - " + url);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
response.buffer().then(buffer => {
|
||||||
|
if (buffer.length > 100) {
|
||||||
|
console.log("Cache - Saving " + response.request().method() + " - " + url + " - " + response.request().resourceType());
|
||||||
|
|
||||||
|
const key = crypto.createHash('md5').update(url).digest("hex");
|
||||||
|
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
|
||||||
|
|
||||||
|
if (!fs.existsSync(dir_path)) {
|
||||||
|
fs.mkdirSync(dir_path, {recursive: true})
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fs.existsSync(dir_path + key)) {
|
||||||
|
if (file_is_expired(dir_path + key)) {
|
||||||
|
fs.writeFileSync(dir_path + key, buffer);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fs.writeFileSync(dir_path + key, buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const r = await page.goto(url, {
|
||||||
|
waitUntil: 'load'
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.waitForTimeout(1000);
|
||||||
|
await page.waitForTimeout(extra_wait_ms);
|
||||||
|
|
||||||
|
if (execute_js) {
|
||||||
|
await page.evaluate(execute_js);
|
||||||
|
await page.waitForTimeout(200);
|
||||||
|
}
|
||||||
|
|
||||||
|
var xpath_data;
|
||||||
|
var instock_data;
|
||||||
|
try {
|
||||||
|
// Not sure the best way here, in the future this should be a new package added to npm then run in browserless
|
||||||
|
// (Once the old playwright is removed)
|
||||||
|
xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters);
|
||||||
|
instock_data = await page.evaluate(() => {%instock_scrape_code%});
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Protocol error (Page.captureScreenshot): Cannot take screenshot with 0 width can come from a proxy auth failure
|
||||||
|
// Wrap it here (for now)
|
||||||
|
|
||||||
|
var b64s = false;
|
||||||
|
try {
|
||||||
|
b64s = await page.screenshot({encoding: "base64", fullPage: true, quality: screenshot_quality, type: 'jpeg'});
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw'
|
||||||
|
if (!b64s) {
|
||||||
|
// @todo after text extract, we can place some overlay text with red background to say 'croppped'
|
||||||
|
console.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot');
|
||||||
|
try {
|
||||||
|
b64s = await page.screenshot({encoding: "base64", quality: screenshot_quality, type: 'jpeg'});
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var html = await page.content();
|
||||||
|
return {
|
||||||
|
data: {
|
||||||
|
'content': html,
|
||||||
|
'headers': r.headers(),
|
||||||
|
'instock_data': instock_data,
|
||||||
|
'screenshot': b64s,
|
||||||
|
'status_code': r.status(),
|
||||||
|
'xpath_data': xpath_data
|
||||||
|
},
|
||||||
|
type: 'application/json',
|
||||||
|
};
|
||||||
|
};
|
||||||
@@ -10,6 +10,7 @@ function isItemInStock() {
|
|||||||
'brak na stanie',
|
'brak na stanie',
|
||||||
'brak w magazynie',
|
'brak w magazynie',
|
||||||
'coming soon',
|
'coming soon',
|
||||||
|
'currently have any tickets for this',
|
||||||
'currently unavailable',
|
'currently unavailable',
|
||||||
'en rupture de stock',
|
'en rupture de stock',
|
||||||
'item is no longer available',
|
'item is no longer available',
|
||||||
@@ -20,7 +21,9 @@ function isItemInStock() {
|
|||||||
'nicht zur verfügung',
|
'nicht zur verfügung',
|
||||||
'no disponible temporalmente',
|
'no disponible temporalmente',
|
||||||
'no longer in stock',
|
'no longer in stock',
|
||||||
|
'no tickets available',
|
||||||
'not available',
|
'not available',
|
||||||
|
'not currently available',
|
||||||
'not in stock',
|
'not in stock',
|
||||||
'notify me when available',
|
'notify me when available',
|
||||||
'não estamos a aceitar encomendas',
|
'não estamos a aceitar encomendas',
|
||||||
@@ -30,6 +33,8 @@ function isItemInStock() {
|
|||||||
'sold out',
|
'sold out',
|
||||||
'temporarily out of stock',
|
'temporarily out of stock',
|
||||||
'temporarily unavailable',
|
'temporarily unavailable',
|
||||||
|
'tickets unavailable',
|
||||||
|
'unavailable tickets',
|
||||||
'we do not currently have an estimate of when this product will be back in stock.',
|
'we do not currently have an estimate of when this product will be back in stock.',
|
||||||
'zur zeit nicht an lager',
|
'zur zeit nicht an lager',
|
||||||
];
|
];
|
||||||
|
|||||||
@@ -8,8 +8,14 @@
|
|||||||
// Some pages like https://www.londonstockexchange.com/stock/NCCL/ncondezi-energy-limited/analysis
|
// Some pages like https://www.londonstockexchange.com/stock/NCCL/ncondezi-energy-limited/analysis
|
||||||
// will automatically force a scroll somewhere, so include the position offset
|
// will automatically force a scroll somewhere, so include the position offset
|
||||||
// Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing
|
// Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing
|
||||||
|
var scroll_y = 0;
|
||||||
|
try {
|
||||||
|
scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
var scroll_y=+document.documentElement.scrollTop || document.body.scrollTop
|
|
||||||
|
|
||||||
// Include the getXpath script directly, easier than fetching
|
// Include the getXpath script directly, easier than fetching
|
||||||
function getxpath(e) {
|
function getxpath(e) {
|
||||||
@@ -38,15 +44,15 @@ const findUpTag = (el) => {
|
|||||||
if (el.name !== undefined && el.name.length) {
|
if (el.name !== undefined && el.name.length) {
|
||||||
var proposed = el.tagName + "[name=" + el.name + "]";
|
var proposed = el.tagName + "[name=" + el.name + "]";
|
||||||
var proposed_element = window.document.querySelectorAll(proposed);
|
var proposed_element = window.document.querySelectorAll(proposed);
|
||||||
if(proposed_element.length) {
|
if (proposed_element.length) {
|
||||||
if (proposed_element.length === 1) {
|
if (proposed_element.length === 1) {
|
||||||
return proposed;
|
return proposed;
|
||||||
} else {
|
} else {
|
||||||
// Some sites change ID but name= stays the same, we can hit it if we know the index
|
// Some sites change ID but name= stays the same, we can hit it if we know the index
|
||||||
// Find all the elements that match and work out the input[n]
|
// Find all the elements that match and work out the input[n]
|
||||||
var n=Array.from(proposed_element).indexOf(el);
|
var n = Array.from(proposed_element).indexOf(el);
|
||||||
// Return a Playwright selector for nthinput[name=zipcode]
|
// Return a Playwright selector for nthinput[name=zipcode]
|
||||||
return proposed+" >> nth="+n;
|
return proposed + " >> nth=" + n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -114,11 +114,11 @@ $(document).ready(function () {
|
|||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// When the mouse moves we know which element it should be above
|
||||||
|
// mousedown will link that to the UI (select the right action, highlight etc)
|
||||||
$('#browsersteps-selector-canvas').bind('mousedown', function (e) {
|
$('#browsersteps-selector-canvas').bind('mousedown', function (e) {
|
||||||
// https://developer.mozilla.org/en-US/docs/Web/API/MouseEvent
|
// https://developer.mozilla.org/en-US/docs/Web/API/MouseEvent
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
console.log(e);
|
|
||||||
console.log("current xpath in index is " + current_selected_i);
|
|
||||||
last_click_xy = {'x': parseInt((1 / x_scale) * e.offsetX), 'y': parseInt((1 / y_scale) * e.offsetY)}
|
last_click_xy = {'x': parseInt((1 / x_scale) * e.offsetX), 'y': parseInt((1 / y_scale) * e.offsetY)}
|
||||||
process_selected(current_selected_i);
|
process_selected(current_selected_i);
|
||||||
current_selected_i = false;
|
current_selected_i = false;
|
||||||
@@ -132,6 +132,7 @@ $(document).ready(function () {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Debounce and find the current most 'interesting' element we are hovering above
|
||||||
$('#browsersteps-selector-canvas').bind('mousemove', function (e) {
|
$('#browsersteps-selector-canvas').bind('mousemove', function (e) {
|
||||||
if (!xpath_data) {
|
if (!xpath_data) {
|
||||||
return;
|
return;
|
||||||
@@ -151,41 +152,40 @@ $(document).ready(function () {
|
|||||||
current_selected_i = false;
|
current_selected_i = false;
|
||||||
// Reverse order - the most specific one should be deeper/"laster"
|
// Reverse order - the most specific one should be deeper/"laster"
|
||||||
// Basically, find the most 'deepest'
|
// Basically, find the most 'deepest'
|
||||||
//$('#browsersteps-selector-canvas').css('cursor', 'pointer');
|
var possible_elements = [];
|
||||||
for (var i = xpath_data['size_pos'].length; i !== 0; i--) {
|
xpath_data['size_pos'].forEach(function (item, index) {
|
||||||
// draw all of them? let them choose somehow?
|
|
||||||
var sel = xpath_data['size_pos'][i - 1];
|
|
||||||
// If we are in a bounding-box
|
// If we are in a bounding-box
|
||||||
if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale
|
if (e.offsetY > item.top * y_scale && e.offsetY < item.top * y_scale + item.height * y_scale
|
||||||
&&
|
&&
|
||||||
e.offsetX > sel.left * y_scale && e.offsetX < sel.left * y_scale + sel.width * y_scale
|
e.offsetX > item.left * y_scale && e.offsetX < item.left * y_scale + item.width * y_scale
|
||||||
|
|
||||||
) {
|
) {
|
||||||
// Only highlight these interesting types
|
// There could be many elements here, record them all and then we'll find out which is the most 'useful'
|
||||||
if (1) {
|
// (input, textarea, button, A etc)
|
||||||
ctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
|
if (item.width < xpath_data['browser_width']) {
|
||||||
ctx.fillRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
|
possible_elements.push(item);
|
||||||
current_selected_i = i - 1;
|
|
||||||
break;
|
|
||||||
|
|
||||||
// find the smallest one at this x,y
|
|
||||||
// does it mean sort the xpath list by size (w*h) i think so!
|
|
||||||
} else {
|
|
||||||
|
|
||||||
if (include_text_elements[0].checked === true) {
|
|
||||||
// blue one with background instead?
|
|
||||||
ctx.fillStyle = 'rgba(0,0,255, 0.1)';
|
|
||||||
ctx.strokeStyle = 'rgba(0,0,200, 0.7)';
|
|
||||||
$('#browsersteps-selector-canvas').css('cursor', 'grab');
|
|
||||||
ctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
|
|
||||||
ctx.fillRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
|
|
||||||
current_selected_i = i - 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Find the best one
|
||||||
|
if (possible_elements.length) {
|
||||||
|
possible_elements.forEach(function (item, index) {
|
||||||
|
if (["a", "input", "textarea", "button"].includes(item['tagName'])) {
|
||||||
|
current_selected_i = item;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!current_selected_i) {
|
||||||
|
current_selected_i = possible_elements[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
sel = xpath_data['size_pos'][current_selected_i];
|
||||||
|
ctx.strokeRect(current_selected_i.left * x_scale, current_selected_i.top * y_scale, current_selected_i.width * x_scale, current_selected_i.height * y_scale);
|
||||||
|
ctx.fillRect(current_selected_i.left * x_scale, current_selected_i.top * y_scale, current_selected_i.width * x_scale, current_selected_i.height * y_scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}.debounce(10));
|
}.debounce(10));
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -195,16 +195,16 @@ $(document).ready(function () {
|
|||||||
|
|
||||||
|
|
||||||
// callback for clicking on an xpath on the canvas
|
// callback for clicking on an xpath on the canvas
|
||||||
function process_selected(xpath_data_index) {
|
function process_selected(selected_in_xpath_list) {
|
||||||
found_something = false;
|
found_something = false;
|
||||||
var first_available = $("ul#browser_steps li.empty").first();
|
var first_available = $("ul#browser_steps li.empty").first();
|
||||||
|
|
||||||
|
|
||||||
if (xpath_data_index !== false) {
|
if (selected_in_xpath_list !== false) {
|
||||||
// Nothing focused, so fill in a new one
|
// Nothing focused, so fill in a new one
|
||||||
// if inpt type button or <button>
|
// if inpt type button or <button>
|
||||||
// from the top, find the next not used one and use it
|
// from the top, find the next not used one and use it
|
||||||
var x = xpath_data['size_pos'][xpath_data_index];
|
var x = selected_in_xpath_list;
|
||||||
console.log(x);
|
console.log(x);
|
||||||
if (x && first_available.length) {
|
if (x && first_available.length) {
|
||||||
// @todo will it let you click shit that has a layer ontop? probably not.
|
// @todo will it let you click shit that has a layer ontop? probably not.
|
||||||
@@ -214,26 +214,18 @@ $(document).ready(function () {
|
|||||||
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
|
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
|
||||||
found_something = true;
|
found_something = true;
|
||||||
} else {
|
} else {
|
||||||
if (x['isClickable'] || x['tagName'].startsWith('h') || x['tagName'] === 'a' || x['tagName'] === 'button' || x['tagtype'] === 'submit' || x['tagtype'] === 'checkbox' || x['tagtype'] === 'radio' || x['tagtype'] === 'li') {
|
// There's no good way (that I know) to find if this
|
||||||
|
// see https://stackoverflow.com/questions/446892/how-to-find-event-listeners-on-a-dom-node-in-javascript-or-in-debugging
|
||||||
|
// https://codepen.io/azaslavsky/pen/DEJVWv
|
||||||
|
|
||||||
|
// So we dont know if its really a clickable element or not :-(
|
||||||
|
// Assume it is - then we dont fill the pages with unreliable "Click X,Y" selections
|
||||||
|
// If you switch to "Click X,y" after an element here is setup, it will give the last co-ords anyway
|
||||||
|
//if (x['isClickable'] || x['tagName'].startsWith('h') || x['tagName'] === 'a' || x['tagName'] === 'button' || x['tagtype'] === 'submit' || x['tagtype'] === 'checkbox' || x['tagtype'] === 'radio' || x['tagtype'] === 'li') {
|
||||||
$('select', first_available).val('Click element').change();
|
$('select', first_available).val('Click element').change();
|
||||||
$('input[type=text]', first_available).first().val(x['xpath']);
|
$('input[type=text]', first_available).first().val(x['xpath']);
|
||||||
found_something = true;
|
found_something = true;
|
||||||
}
|
//}
|
||||||
}
|
|
||||||
|
|
||||||
first_available.xpath_data_index = xpath_data_index;
|
|
||||||
|
|
||||||
if (!found_something) {
|
|
||||||
if (include_text_elements[0].checked === true) {
|
|
||||||
// Suggest that we use as filter?
|
|
||||||
// @todo filters should always be in the last steps, nothing non-filter after it
|
|
||||||
found_something = true;
|
|
||||||
ctx.strokeStyle = 'rgba(0,0,255, 0.9)';
|
|
||||||
ctx.fillStyle = 'rgba(0,0,255, 0.1)';
|
|
||||||
$('select', first_available).val('Extract text and use as filter').change();
|
|
||||||
$('input[type=text]', first_available).first().val(x['xpath']);
|
|
||||||
include_text_elements[0].checked = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -248,7 +240,7 @@ $(document).ready(function () {
|
|||||||
|
|
||||||
function start() {
|
function start() {
|
||||||
console.log("Starting browser-steps UI");
|
console.log("Starting browser-steps UI");
|
||||||
browsersteps_session_id = Date.now();
|
browsersteps_session_id = false;
|
||||||
// @todo This setting of the first one should be done at the datalayer but wtforms doesnt wanna play nice
|
// @todo This setting of the first one should be done at the datalayer but wtforms doesnt wanna play nice
|
||||||
$('#browser_steps >li:first-child').removeClass('empty');
|
$('#browser_steps >li:first-child').removeClass('empty');
|
||||||
set_first_gotosite_disabled();
|
set_first_gotosite_disabled();
|
||||||
@@ -256,7 +248,7 @@ $(document).ready(function () {
|
|||||||
$('.clear,.remove', $('#browser_steps >li:first-child')).hide();
|
$('.clear,.remove', $('#browser_steps >li:first-child')).hide();
|
||||||
$.ajax({
|
$.ajax({
|
||||||
type: "GET",
|
type: "GET",
|
||||||
url: browser_steps_sync_url + "&browsersteps_session_id=" + browsersteps_session_id,
|
url: browser_steps_start_url,
|
||||||
statusCode: {
|
statusCode: {
|
||||||
400: function () {
|
400: function () {
|
||||||
// More than likely the CSRF token was lost when the server restarted
|
// More than likely the CSRF token was lost when the server restarted
|
||||||
@@ -264,12 +256,12 @@ $(document).ready(function () {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}).done(function (data) {
|
}).done(function (data) {
|
||||||
xpath_data = data.xpath_data;
|
|
||||||
$("#loading-status-text").fadeIn();
|
$("#loading-status-text").fadeIn();
|
||||||
|
browsersteps_session_id = data.browsersteps_session_id;
|
||||||
// This should trigger 'Goto site'
|
// This should trigger 'Goto site'
|
||||||
console.log("Got startup response, requesting Goto-Site (first) step fake click");
|
console.log("Got startup response, requesting Goto-Site (first) step fake click");
|
||||||
$('#browser_steps >li:first-child .apply').click();
|
$('#browser_steps >li:first-child .apply').click();
|
||||||
browserless_seconds_remaining = data.browser_time_remaining;
|
browserless_seconds_remaining = 500;
|
||||||
set_first_gotosite_disabled();
|
set_first_gotosite_disabled();
|
||||||
}).fail(function (data) {
|
}).fail(function (data) {
|
||||||
console.log(data);
|
console.log(data);
|
||||||
@@ -430,7 +422,6 @@ $(document).ready(function () {
|
|||||||
apply_buttons_disabled = false;
|
apply_buttons_disabled = false;
|
||||||
$("#browsersteps-img").css('opacity', 1);
|
$("#browsersteps-img").css('opacity', 1);
|
||||||
$('ul#browser_steps li .control .apply').css('opacity', 1);
|
$('ul#browser_steps li .control .apply').css('opacity', 1);
|
||||||
browserless_seconds_remaining = data.browser_time_remaining;
|
|
||||||
$("#loading-status-text").hide();
|
$("#loading-status-text").hide();
|
||||||
set_first_gotosite_disabled();
|
set_first_gotosite_disabled();
|
||||||
}).fail(function (data) {
|
}).fail(function (data) {
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ window.addEventListener('hashchange', function () {
|
|||||||
var has_errors = document.querySelectorAll(".messages .error");
|
var has_errors = document.querySelectorAll(".messages .error");
|
||||||
if (!has_errors.length) {
|
if (!has_errors.length) {
|
||||||
if (document.location.hash == "") {
|
if (document.location.hash == "") {
|
||||||
document.querySelector(".tabs ul li:first-child a").click();
|
location.replace(document.querySelector(".tabs ul li:first-child a").hash);
|
||||||
} else {
|
} else {
|
||||||
set_active_tab();
|
set_active_tab();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
* Toggles theme between light and dark mode.
|
* Toggles theme between light and dark mode.
|
||||||
*/
|
*/
|
||||||
$(document).ready(function () {
|
$(document).ready(function () {
|
||||||
const button = document.getElementsByClassName("toggle-theme")[0];
|
const button = document.getElementById("toggle-light-mode");
|
||||||
|
|
||||||
button.onclick = () => {
|
button.onclick = () => {
|
||||||
const htmlElement = document.getElementsByTagName("html");
|
const htmlElement = document.getElementsByTagName("html");
|
||||||
@@ -21,4 +21,33 @@ $(document).ready(function () {
|
|||||||
const setCookieValue = (value) => {
|
const setCookieValue = (value) => {
|
||||||
document.cookie = `css_dark_mode=${value};max-age=31536000;path=/`
|
document.cookie = `css_dark_mode=${value};max-age=31536000;path=/`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Search input box behaviour
|
||||||
|
const toggle_search = document.getElementById("toggle-search");
|
||||||
|
const search_q = document.getElementById("search-q");
|
||||||
|
window.addEventListener('keydown', function (e) {
|
||||||
|
|
||||||
|
if (e.altKey == true && e.keyCode == 83)
|
||||||
|
search_q.classList.toggle('expanded');
|
||||||
|
search_q.focus();
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
search_q.onkeydown = (e) => {
|
||||||
|
var key = e.keyCode || e.which;
|
||||||
|
if (key === 13) {
|
||||||
|
document.searchForm.submit();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
toggle_search.onclick = () => {
|
||||||
|
// Could be that they want to search something once text is in there
|
||||||
|
if (search_q.value.length) {
|
||||||
|
document.searchForm.submit();
|
||||||
|
} else {
|
||||||
|
// If not..
|
||||||
|
search_q.classList.toggle('expanded');
|
||||||
|
search_q.focus();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -61,7 +61,12 @@ $(document).ready(function () {
|
|||||||
function bootstrap_visualselector() {
|
function bootstrap_visualselector() {
|
||||||
if (1) {
|
if (1) {
|
||||||
// bootstrap it, this will trigger everything else
|
// bootstrap it, this will trigger everything else
|
||||||
$("img#selector-background").bind('load', function () {
|
$("img#selector-background").on("error", function () {
|
||||||
|
$('.fetching-update-notice').html("<strong>Ooops!</strong> The VisualSelector tool needs atleast one fetched page, please unpause the watch and/or wait for the watch to complete fetching and then reload this page.");
|
||||||
|
$('.fetching-update-notice').css('color','#bb0000');
|
||||||
|
$('#selector-current-xpath').hide();
|
||||||
|
$('#clear-selector').hide();
|
||||||
|
}).bind('load', function () {
|
||||||
console.log("Loaded background...");
|
console.log("Loaded background...");
|
||||||
c = document.getElementById("selector-canvas");
|
c = document.getElementById("selector-canvas");
|
||||||
// greyed out fill context
|
// greyed out fill context
|
||||||
@@ -79,10 +84,11 @@ $(document).ready(function () {
|
|||||||
}).attr("src", screenshot_url);
|
}).attr("src", screenshot_url);
|
||||||
}
|
}
|
||||||
// Tell visualSelector that the image should update
|
// Tell visualSelector that the image should update
|
||||||
var s = $("img#selector-background").attr('src')+"?"+ new Date().getTime();
|
var s = $("img#selector-background").attr('src') + "?" + new Date().getTime();
|
||||||
$("img#selector-background").attr('src',s)
|
$("img#selector-background").attr('src', s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is fired once the img src is loaded in bootstrap_visualselector()
|
||||||
function fetch_data() {
|
function fetch_data() {
|
||||||
// Image is ready
|
// Image is ready
|
||||||
$('.fetching-update-notice').html("Fetching element data..");
|
$('.fetching-update-notice').html("Fetching element data..");
|
||||||
@@ -99,7 +105,8 @@ $(document).ready(function () {
|
|||||||
reflow_selector();
|
reflow_selector();
|
||||||
$('.fetching-update-notice').fadeOut();
|
$('.fetching-update-notice').fadeOut();
|
||||||
});
|
});
|
||||||
};
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
function set_scale() {
|
function set_scale() {
|
||||||
|
|||||||
@@ -54,8 +54,47 @@ a.github-link {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
button.toggle-theme {
|
#toggle-light-mode {
|
||||||
width: 4rem;
|
width: 3rem;
|
||||||
|
.icon-dark {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
&.dark {
|
||||||
|
.icon-light {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.icon-dark {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#toggle-search {
|
||||||
|
width: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
#search-q {
|
||||||
|
opacity: 0;
|
||||||
|
-webkit-transition: all .9s ease;
|
||||||
|
-moz-transition: all .9s ease;
|
||||||
|
transition: all .9s ease;
|
||||||
|
width: 0;
|
||||||
|
display: none;
|
||||||
|
&.expanded {
|
||||||
|
width: auto;
|
||||||
|
display: inline-block;
|
||||||
|
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#search-result-info {
|
||||||
|
color: #fff;
|
||||||
|
}
|
||||||
|
|
||||||
|
button.toggle-button {
|
||||||
|
vertical-align: middle;
|
||||||
background: transparent;
|
background: transparent;
|
||||||
border: none;
|
border: none;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
@@ -74,19 +113,7 @@ button.toggle-theme {
|
|||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
|
|
||||||
.icon-dark {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
&.dark {
|
|
||||||
.icon-light {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.icon-dark {
|
|
||||||
display: block;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.pure-menu-horizontal {
|
.pure-menu-horizontal {
|
||||||
|
|||||||
@@ -331,23 +331,44 @@ a.github-link {
|
|||||||
a.github-link:hover {
|
a.github-link:hover {
|
||||||
color: var(--color-icon-github-hover); }
|
color: var(--color-icon-github-hover); }
|
||||||
|
|
||||||
button.toggle-theme {
|
#toggle-light-mode {
|
||||||
width: 4rem;
|
width: 3rem; }
|
||||||
|
#toggle-light-mode .icon-dark {
|
||||||
|
display: none; }
|
||||||
|
#toggle-light-mode.dark .icon-light {
|
||||||
|
display: none; }
|
||||||
|
#toggle-light-mode.dark .icon-dark {
|
||||||
|
display: block; }
|
||||||
|
|
||||||
|
#toggle-search {
|
||||||
|
width: 2rem; }
|
||||||
|
|
||||||
|
#search-q {
|
||||||
|
opacity: 0;
|
||||||
|
-webkit-transition: all .9s ease;
|
||||||
|
-moz-transition: all .9s ease;
|
||||||
|
transition: all .9s ease;
|
||||||
|
width: 0;
|
||||||
|
display: none; }
|
||||||
|
#search-q.expanded {
|
||||||
|
width: auto;
|
||||||
|
display: inline-block;
|
||||||
|
opacity: 1; }
|
||||||
|
|
||||||
|
#search-result-info {
|
||||||
|
color: #fff; }
|
||||||
|
|
||||||
|
button.toggle-button {
|
||||||
|
vertical-align: middle;
|
||||||
background: transparent;
|
background: transparent;
|
||||||
border: none;
|
border: none;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
color: var(--color-icon-github); }
|
color: var(--color-icon-github); }
|
||||||
button.toggle-theme:hover {
|
button.toggle-button:hover {
|
||||||
color: var(--color-icon-github-hover); }
|
color: var(--color-icon-github-hover); }
|
||||||
button.toggle-theme svg {
|
button.toggle-button svg {
|
||||||
fill: currentColor; }
|
fill: currentColor; }
|
||||||
button.toggle-theme .icon-light {
|
button.toggle-button .icon-light {
|
||||||
display: block; }
|
|
||||||
button.toggle-theme .icon-dark {
|
|
||||||
display: none; }
|
|
||||||
button.toggle-theme.dark .icon-light {
|
|
||||||
display: none; }
|
|
||||||
button.toggle-theme.dark .icon-dark {
|
|
||||||
display: block; }
|
display: block; }
|
||||||
|
|
||||||
.pure-menu-horizontal {
|
.pure-menu-horizontal {
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ from flask import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from . model import App, Watch
|
from . model import App, Watch
|
||||||
from copy import deepcopy
|
from copy import deepcopy, copy
|
||||||
from os import path, unlink
|
from os import path, unlink
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
import json
|
import json
|
||||||
@@ -204,15 +204,16 @@ class ChangeDetectionStore:
|
|||||||
# GitHub #30 also delete history records
|
# GitHub #30 also delete history records
|
||||||
for uuid in self.data['watching']:
|
for uuid in self.data['watching']:
|
||||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||||
shutil.rmtree(path)
|
if os.path.exists(path):
|
||||||
self.needs_write_urgent = True
|
shutil.rmtree(path)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||||
shutil.rmtree(path)
|
if os.path.exists(path):
|
||||||
|
shutil.rmtree(path)
|
||||||
del self.data['watching'][uuid]
|
del self.data['watching'][uuid]
|
||||||
|
|
||||||
self.needs_write_urgent = True
|
self.needs_write_urgent = True
|
||||||
|
|
||||||
# Clone a watch by UUID
|
# Clone a watch by UUID
|
||||||
def clone(self, uuid):
|
def clone(self, uuid):
|
||||||
@@ -366,19 +367,21 @@ class ChangeDetectionStore:
|
|||||||
def save_error_text(self, watch_uuid, contents):
|
def save_error_text(self, watch_uuid, contents):
|
||||||
if not self.data['watching'].get(watch_uuid):
|
if not self.data['watching'].get(watch_uuid):
|
||||||
return
|
return
|
||||||
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
|
|
||||||
|
|
||||||
|
self.data['watching'][watch_uuid].ensure_data_dir_exists()
|
||||||
|
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
|
||||||
with open(target_path, 'w') as f:
|
with open(target_path, 'w') as f:
|
||||||
f.write(contents)
|
f.write(contents)
|
||||||
|
|
||||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||||
|
|
||||||
if not self.data['watching'].get(watch_uuid):
|
if not self.data['watching'].get(watch_uuid):
|
||||||
return
|
return
|
||||||
if as_error:
|
if as_error:
|
||||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
|
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
|
||||||
else:
|
else:
|
||||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
|
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
|
||||||
|
self.data['watching'][watch_uuid].ensure_data_dir_exists()
|
||||||
with open(target_path, 'w') as f:
|
with open(target_path, 'w') as f:
|
||||||
f.write(json.dumps(data))
|
f.write(json.dumps(data))
|
||||||
f.close()
|
f.close()
|
||||||
@@ -472,8 +475,6 @@ class ChangeDetectionStore:
|
|||||||
return proxy_list if len(proxy_list) else None
|
return proxy_list if len(proxy_list) else None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_preferred_proxy_for_watch(self, uuid):
|
def get_preferred_proxy_for_watch(self, uuid):
|
||||||
"""
|
"""
|
||||||
Returns the preferred proxy by ID key
|
Returns the preferred proxy by ID key
|
||||||
@@ -505,6 +506,25 @@ class ChangeDetectionStore:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_extra_headers_file(self):
|
||||||
|
filepath = os.path.join(self.datastore_path, 'headers.txt')
|
||||||
|
return os.path.isfile(filepath)
|
||||||
|
|
||||||
|
def get_all_headers(self):
|
||||||
|
from .model.App import parse_headers_from_text_file
|
||||||
|
headers = copy(self.data['settings'].get('headers', {}))
|
||||||
|
|
||||||
|
filepath = os.path.join(self.datastore_path, 'headers.txt')
|
||||||
|
try:
|
||||||
|
if os.path.isfile(filepath):
|
||||||
|
headers.update(parse_headers_from_text_file(filepath))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"ERROR reading headers.txt at {filepath}", str(e))
|
||||||
|
|
||||||
|
return headers
|
||||||
|
|
||||||
|
|
||||||
# Run all updates
|
# Run all updates
|
||||||
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
|
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
|
||||||
# So therefor - each `update_n` should be very careful about checking if it needs to actually run
|
# So therefor - each `update_n` should be very careful about checking if it needs to actually run
|
||||||
|
|||||||
@@ -23,7 +23,7 @@
|
|||||||
<div class="notifications-wrapper">
|
<div class="notifications-wrapper">
|
||||||
<a id="send-test-notification" class="pure-button button-secondary button-xsmall" >Send test notification</a>
|
<a id="send-test-notification" class="pure-button button-secondary button-xsmall" >Send test notification</a>
|
||||||
{% if emailprefix %}
|
{% if emailprefix %}
|
||||||
<a id="add-email-helper" class="pure-button button-secondary button-xsmall" >Add email <img style="height: 1em; display: inline-block" src="{{url_for('static_content', group='images', filename='email.svg')}}" > </a>
|
<a id="add-email-helper" class="pure-button button-secondary button-xsmall" >Add email <img style="height: 1em; display: inline-block" src="{{url_for('static_content', group='images', filename='email.svg')}}" alt="Add an email address"> </a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" >Notification debug logs</a>
|
<a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" >Notification debug logs</a>
|
||||||
</div>
|
</div>
|
||||||
@@ -115,7 +115,7 @@
|
|||||||
URLs generated by changedetection.io (such as <code>{{ '{{diff_url}}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br>
|
URLs generated by changedetection.io (such as <code>{{ '{{diff_url}}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br>
|
||||||
Your <code>BASE_URL</code> var is currently "{{settings_application['current_base_url']}}"
|
Your <code>BASE_URL</code> var is currently "{{settings_application['current_base_url']}}"
|
||||||
<br>
|
<br>
|
||||||
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removal%7D%7D-notification-tokens">More Here</a> </br>
|
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -30,7 +30,7 @@
|
|||||||
background-image: url({{url_for('static_content', group='images', filename='gradient-border.png') }});
|
background-image: url({{url_for('static_content', group='images', filename='gradient-border.png') }});
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
@@ -82,11 +82,21 @@
|
|||||||
<a href="{{url_for('logout')}}" class="pure-menu-link">LOG OUT</a>
|
<a href="{{url_for('logout')}}" class="pure-menu-link">LOG OUT</a>
|
||||||
</li>
|
</li>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
<li class="pure-menu-item pure-form" id="search-menu-item">
|
||||||
|
<!-- We use GET here so it offers people a chance to set bookmarks etc -->
|
||||||
|
<form name="searchForm" action="" method="GET">
|
||||||
|
<input id="search-q" class="" name="q" placeholder="URL or Title {% if active_tag %}in '{{ active_tag }}'{% endif %}" required="" type="text" value="">
|
||||||
|
<input name="tag" type="hidden" value="{% if active_tag %}{{active_tag}}{% endif %}">
|
||||||
|
<button class="toggle-button " id="toggle-search" type="button" title="Search, or Use Alt+S Key" >
|
||||||
|
{% include "svgs/search-icon.svg" %}
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
</li>
|
||||||
<li class="pure-menu-item">
|
<li class="pure-menu-item">
|
||||||
{% if dark_mode %}
|
{% if dark_mode %}
|
||||||
{% set darkClass = 'dark' %}
|
{% set darkClass = 'dark' %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<button class="toggle-theme {{darkClass}}" type="button" title="Toggle Light/Dark Mode">
|
<button class="toggle-button {{darkClass}}" id ="toggle-light-mode" type="button" title="Toggle Light/Dark Mode">
|
||||||
<span class="visually-hidden">Toggle light/dark mode</span>
|
<span class="visually-hidden">Toggle light/dark mode</span>
|
||||||
<span class="icon-light">
|
<span class="icon-light">
|
||||||
{% include "svgs/light-mode-toggle-icon.svg" %}
|
{% include "svgs/light-mode-toggle-icon.svg" %}
|
||||||
@@ -106,7 +116,7 @@
|
|||||||
</div>
|
</div>
|
||||||
{% if hosted_sticky %}
|
{% if hosted_sticky %}
|
||||||
<div class="sticky-tab" id="hosted-sticky">
|
<div class="sticky-tab" id="hosted-sticky">
|
||||||
<a href="https://lemonade.changedetection.io/start?ref={{guid}}">Let us host your instance!</a>
|
<a href="https://changedetection.io/?ref={{guid}}">Let us host your instance!</a>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if left_sticky %}
|
{% if left_sticky %}
|
||||||
@@ -143,10 +153,7 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
{% block content %}{% endblock %}
|
{% block content %}{% endblock %}
|
||||||
</section>
|
</section>
|
||||||
<script
|
<script src="{{url_for('static_content', group='js', filename='toggle-theme.js')}}" defer></script>
|
||||||
type="text/javascript"
|
|
||||||
src="{{url_for('static_content', group='js', filename='toggle-theme.js')}}"
|
|
||||||
defer></script>
|
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
const error_screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
|
const error_screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</script>
|
</script>
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
|
||||||
|
|
||||||
<div id="settings">
|
<div id="settings">
|
||||||
<h1>Differences</h1>
|
<h1>Differences</h1>
|
||||||
@@ -51,7 +51,7 @@
|
|||||||
<a onclick="next_diff();">Jump</a>
|
<a onclick="next_diff();">Jump</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||||
<div class="tabs">
|
<div class="tabs">
|
||||||
<ul>
|
<ul>
|
||||||
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="#error-text">Error Text</a></li> {% endif %}
|
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="#error-text">Error Text</a></li> {% endif %}
|
||||||
@@ -149,9 +149,9 @@
|
|||||||
<script>
|
<script>
|
||||||
const newest_version_timestamp = {{newest_version_timestamp}};
|
const newest_version_timestamp = {{newest_version_timestamp}};
|
||||||
</script>
|
</script>
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff.min.js')}}"></script>
|
<script src="{{url_for('static_content', group='js', filename='diff.min.js')}}"></script>
|
||||||
|
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>
|
<script src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>
|
||||||
|
|
||||||
|
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
{% block content %}
|
{% block content %}
|
||||||
{% from '_helpers.jinja' import render_field, render_checkbox_field, render_button %}
|
{% from '_helpers.jinja' import render_field, render_checkbox_field, render_button %}
|
||||||
{% from '_common_fields.jinja' import render_common_settings_form %}
|
{% from '_common_fields.jinja' import render_common_settings_form %}
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||||
<script>
|
<script>
|
||||||
const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}";
|
const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}";
|
||||||
const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
|
const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
|
||||||
@@ -14,15 +14,17 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
|
const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
|
||||||
|
const browser_steps_start_url="{{url_for('browser_steps.browsersteps_start_session', uuid=uuid)}}";
|
||||||
const browser_steps_sync_url="{{url_for('browser_steps.browsersteps_ui_update', uuid=uuid)}}";
|
const browser_steps_sync_url="{{url_for('browser_steps.browsersteps_ui_update', uuid=uuid)}}";
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script>
|
||||||
{% if playwright_enabled %}
|
{% if playwright_enabled %}
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='browser-steps.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='browser-steps.js')}}" defer></script>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<div class="edit-form monospaced-textarea">
|
<div class="edit-form monospaced-textarea">
|
||||||
@@ -150,6 +152,17 @@
|
|||||||
{{ render_field(form.headers, rows=5, placeholder="Example
|
{{ render_field(form.headers, rows=5, placeholder="Example
|
||||||
Cookie: foobar
|
Cookie: foobar
|
||||||
User-Agent: wonderbra 1.0") }}
|
User-Agent: wonderbra 1.0") }}
|
||||||
|
|
||||||
|
<div class="pure-form-message-inline">
|
||||||
|
{% if has_extra_headers_file %}
|
||||||
|
<strong>Alert! Extra headers file found and will be added to this watch!</strong>
|
||||||
|
{% else %}
|
||||||
|
Headers can be also read from a file in your data-directory <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Adding-headers-from-an-external-file">Read more here</a>
|
||||||
|
{% endif %}
|
||||||
|
<br>
|
||||||
|
(Not supported by Selenium browser)
|
||||||
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group" id="request-body">
|
<div class="pure-control-group" id="request-body">
|
||||||
{{ render_field(form.body, rows=5, placeholder="Example
|
{{ render_field(form.body, rows=5, placeholder="Example
|
||||||
@@ -163,7 +176,7 @@ User-Agent: wonderbra 1.0") }}
|
|||||||
</div>
|
</div>
|
||||||
{% if playwright_enabled %}
|
{% if playwright_enabled %}
|
||||||
<div class="tab-pane-inner" id="browser-steps">
|
<div class="tab-pane-inner" id="browser-steps">
|
||||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}">
|
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
<!--
|
<!--
|
||||||
@@ -186,11 +199,12 @@ User-Agent: wonderbra 1.0") }}
|
|||||||
<span class="loader" >
|
<span class="loader" >
|
||||||
<span id="browsersteps-click-start">
|
<span id="browsersteps-click-start">
|
||||||
<h2 >Click here to Start</h2>
|
<h2 >Click here to Start</h2>
|
||||||
Please allow 10-15 seconds for the browser to connect.
|
<svg style="height: 3.5rem;" version="1.1" viewBox="0 0 32 32" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g id="Layer_1"/><g id="play_x5F_alt"><path d="M16,0C7.164,0,0,7.164,0,16s7.164,16,16,16s16-7.164,16-16S24.836,0,16,0z M10,24V8l16.008,8L10,24z" style="fill: var(--color-grey-400);"/></g></svg><br>
|
||||||
|
Please allow 10-15 seconds for the browser to connect.<br>
|
||||||
</span>
|
</span>
|
||||||
<div class="spinner" style="display: none;"></div>
|
<div class="spinner" style="display: none;"></div>
|
||||||
</span>
|
</span>
|
||||||
<img class="noselect" id="browsersteps-img" src="" style="max-width: 100%; width: 100%;" >
|
<img class="noselect" id="browsersteps-img" src="" style="max-width: 100%; width: 100%;" >
|
||||||
<canvas class="noselect" id="browsersteps-selector-canvas" style="max-width: 100%; width: 100%;"></canvas>
|
<canvas class="noselect" id="browsersteps-selector-canvas" style="max-width: 100%; width: 100%;"></canvas>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -390,7 +404,7 @@ Unavailable") }}
|
|||||||
|
|
||||||
{% if watch['processor'] == 'text_json_diff' %}
|
{% if watch['processor'] == 'text_json_diff' %}
|
||||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}">
|
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||||
|
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{% extends 'base.html' %}
|
{% extends 'base.html' %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
{% from '_helpers.jinja' import render_field %}
|
{% from '_helpers.jinja' import render_field %}
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||||
<div class="edit-form monospaced-textarea">
|
<div class="edit-form monospaced-textarea">
|
||||||
|
|
||||||
<div class="tabs collapsable">
|
<div class="tabs collapsable">
|
||||||
|
|||||||
@@ -7,9 +7,9 @@
|
|||||||
const error_screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
|
const error_screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</script>
|
</script>
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
|
||||||
|
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||||
<div class="tabs">
|
<div class="tabs">
|
||||||
<ul>
|
<ul>
|
||||||
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="#error-text">Error Text</a></li> {% endif %}
|
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="#error-text">Error Text</a></li> {% endif %}
|
||||||
|
|||||||
@@ -9,10 +9,10 @@
|
|||||||
const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}');
|
const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}');
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</script>
|
</script>
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
|
||||||
|
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
|
||||||
<div class="edit-form">
|
<div class="edit-form">
|
||||||
<div class="tabs collapsable">
|
<div class="tabs collapsable">
|
||||||
<ul>
|
<ul>
|
||||||
@@ -70,6 +70,10 @@
|
|||||||
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
|
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_field(form.application.form.pager_size) }}
|
||||||
|
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
|
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?><svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 122.879 119.799" enable-background="new 0 0 122.879 119.799" xml:space="preserve"><g><path d="M49.988,0h0.016v0.007C63.803,0.011,76.298,5.608,85.34,14.652c9.027,9.031,14.619,21.515,14.628,35.303h0.007v0.033v0.04 h-0.007c-0.005,5.557-0.917,10.905-2.594,15.892c-0.281,0.837-0.575,1.641-0.877,2.409v0.007c-1.446,3.66-3.315,7.12-5.547,10.307 l29.082,26.139l0.018,0.016l0.157,0.146l0.011,0.011c1.642,1.563,2.536,3.656,2.649,5.78c0.11,2.1-0.543,4.248-1.979,5.971 l-0.011,0.016l-0.175,0.203l-0.035,0.035l-0.146,0.16l-0.016,0.021c-1.565,1.642-3.654,2.534-5.78,2.646 c-2.097,0.111-4.247-0.54-5.971-1.978l-0.015-0.011l-0.204-0.175l-0.029-0.024L78.761,90.865c-0.88,0.62-1.778,1.209-2.687,1.765 c-1.233,0.755-2.51,1.466-3.813,2.115c-6.699,3.342-14.269,5.222-22.272,5.222v0.007h-0.016v-0.007 c-13.799-0.004-26.296-5.601-35.338-14.645C5.605,76.291,0.016,63.805,0.007,50.021H0v-0.033v-0.016h0.007 c0.004-13.799,5.601-26.296,14.645-35.338C23.683,5.608,36.167,0.016,49.955,0.007V0H49.988L49.988,0z M50.004,11.21v0.007h-0.016 h-0.033V11.21c-10.686,0.007-20.372,4.35-27.384,11.359C15.56,29.578,11.213,39.274,11.21,49.973h0.007v0.016v0.033H11.21 c0.007,10.686,4.347,20.367,11.359,27.381c7.009,7.012,16.705,11.359,27.403,11.361v-0.007h0.016h0.033v0.007 c10.686-0.007,20.368-4.348,27.382-11.359c7.011-7.009,11.358-16.702,11.36-27.4h-0.006v-0.016v-0.033h0.006 c-0.006-10.686-4.35-20.372-11.358-27.384C70.396,15.56,60.703,11.213,50.004,11.21L50.004,11.21z"/></g></svg>
|
||||||
|
After Width: | Height: | Size: 1.6 KiB |
@@ -1,8 +1,8 @@
|
|||||||
{% extends 'base.html' %}
|
{% extends 'base.html' %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
{% from '_helpers.jinja' import render_simple_field, render_field %}
|
{% from '_helpers.jinja' import render_simple_field, render_field %}
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||||
|
|
||||||
<div class="box">
|
<div class="box">
|
||||||
|
|
||||||
@@ -38,11 +38,13 @@
|
|||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck">Recheck</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck">Recheck</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed">Mark viewed</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed">Mark viewed</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button>
|
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history">Clear/reset history</button>
|
||||||
|
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete">Delete</button>
|
||||||
</div>
|
</div>
|
||||||
{% if watches|length >= pagination.per_page %}
|
{% if watches|length >= pagination.per_page %}
|
||||||
{{ pagination.info }}
|
{{ pagination.info }}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% if search_q %}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{% endif %}
|
||||||
<div>
|
<div>
|
||||||
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
|
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
|
||||||
{% for tag in tags %}
|
{% for tag in tags %}
|
||||||
@@ -72,8 +74,12 @@
|
|||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
|
{% if not watches|length %}
|
||||||
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))[pagination.skip:pagination.skip+pagination.per_page] %}
|
<tr>
|
||||||
|
<td colspan="6">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('import_page')}}" >import a list</a>.</td>
|
||||||
|
</tr>
|
||||||
|
{% endif %}
|
||||||
|
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
|
||||||
<tr id="{{ watch.uuid }}"
|
<tr id="{{ watch.uuid }}"
|
||||||
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
|
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
|
||||||
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
||||||
|
|||||||
@@ -14,13 +14,16 @@ global app
|
|||||||
|
|
||||||
def cleanup(datastore_path):
|
def cleanup(datastore_path):
|
||||||
# Unlink test output files
|
# Unlink test output files
|
||||||
files = ['output.txt',
|
files = [
|
||||||
'url-watches.json',
|
'count.txt',
|
||||||
'secret.txt',
|
'endpoint-content.txt'
|
||||||
'notification.txt',
|
'headers.txt',
|
||||||
'count.txt',
|
'headers-testtag.txt',
|
||||||
'endpoint-content.txt'
|
'notification.txt',
|
||||||
]
|
'secret.txt',
|
||||||
|
'url-watches.json',
|
||||||
|
'output.txt',
|
||||||
|
]
|
||||||
for file in files:
|
for file in files:
|
||||||
try:
|
try:
|
||||||
os.unlink("{}/{}".format(datastore_path, file))
|
os.unlink("{}/{}".format(datastore_path, file))
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for, escape
|
from flask import url_for, escape
|
||||||
from . util import live_server_setup
|
from . util import live_server_setup, wait_for_all_checks
|
||||||
import pytest
|
import pytest
|
||||||
jq_support = True
|
jq_support = True
|
||||||
|
|
||||||
@@ -64,6 +64,24 @@ and it can also be repeated
|
|||||||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
|
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
|
||||||
|
|
||||||
|
|
||||||
|
def test_unittest_inline_extract_body():
|
||||||
|
content = """
|
||||||
|
<html>
|
||||||
|
<head></head>
|
||||||
|
<body>
|
||||||
|
<pre style="word-wrap: break-word; white-space: pre-wrap;">
|
||||||
|
{"testKey": 42}
|
||||||
|
</pre>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
from .. import html_tools
|
||||||
|
|
||||||
|
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||||
|
text = html_tools.extract_json_as_string(content, "json:$.testKey")
|
||||||
|
assert text == '42'
|
||||||
|
|
||||||
def set_original_ext_response():
|
def set_original_ext_response():
|
||||||
data = """
|
data = """
|
||||||
[
|
[
|
||||||
@@ -436,6 +454,37 @@ def test_ignore_json_order(client, live_server):
|
|||||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
assert b'Deleted' in res.data
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
def test_correct_header_detect(client, live_server):
|
||||||
|
# Like in https://github.com/dgtlmoon/changedetection.io/pull/1593
|
||||||
|
# Specify extra html that JSON is sometimes wrapped in - when using Browserless/Puppeteer etc
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write('<html><body>{"hello" : 123, "world": 123}')
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
# Check weird casing is cleaned up and detected also
|
||||||
|
test_url = url_for('test_endpoint', content_type="aPPlication/JSon", uppercase_headers=True, _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
|
||||||
|
# Fixed in #1593
|
||||||
|
assert b'No parsable JSON found in this document' not in res.data
|
||||||
|
|
||||||
|
res = client.get(
|
||||||
|
url_for("preview_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b'"world":' in res.data
|
||||||
|
assert res.data.count(b'{') >= 2
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
def test_check_jsonpath_ext_filter(client, live_server):
|
def test_check_jsonpath_ext_filter(client, live_server):
|
||||||
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
|
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from . util import set_original_response, set_modified_response, live_server_setup
|
from . util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||||
|
|
||||||
def test_setup(live_server):
|
def test_setup(live_server):
|
||||||
live_server_setup(live_server)
|
live_server_setup(live_server)
|
||||||
@@ -9,8 +10,12 @@ def test_setup(live_server):
|
|||||||
# Hard to just add more live server URLs when one test is already running (I think)
|
# Hard to just add more live server URLs when one test is already running (I think)
|
||||||
# So we add our test here (was in a different file)
|
# So we add our test here (was in a different file)
|
||||||
def test_headers_in_request(client, live_server):
|
def test_headers_in_request(client, live_server):
|
||||||
|
#live_server_setup(live_server)
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_headers', _external=True)
|
test_url = url_for('test_headers', _external=True)
|
||||||
|
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||||
|
# Because its no longer calling back to localhost but from browserless, set in test-only.yml
|
||||||
|
test_url = test_url.replace('localhost', 'changedet')
|
||||||
|
|
||||||
# Add the test URL twice, we will check
|
# Add the test URL twice, we will check
|
||||||
res = client.post(
|
res = client.post(
|
||||||
@@ -29,7 +34,7 @@ def test_headers_in_request(client, live_server):
|
|||||||
)
|
)
|
||||||
assert b"1 Imported" in res.data
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
time.sleep(3)
|
wait_for_all_checks(client)
|
||||||
cookie_header = '_ga=GA1.2.1022228332; cookie-preferences=analytics:accepted;'
|
cookie_header = '_ga=GA1.2.1022228332; cookie-preferences=analytics:accepted;'
|
||||||
|
|
||||||
|
|
||||||
@@ -39,7 +44,7 @@ def test_headers_in_request(client, live_server):
|
|||||||
data={
|
data={
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tag": "",
|
"tag": "",
|
||||||
"fetch_backend": "html_requests",
|
"fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
|
||||||
"headers": "xxx:ooo\ncool:yeah\r\ncookie:"+cookie_header},
|
"headers": "xxx:ooo\ncool:yeah\r\ncookie:"+cookie_header},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
@@ -47,7 +52,7 @@ def test_headers_in_request(client, live_server):
|
|||||||
|
|
||||||
|
|
||||||
# Give the thread time to pick up the first version
|
# Give the thread time to pick up the first version
|
||||||
time.sleep(5)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# The service should echo back the request headers
|
# The service should echo back the request headers
|
||||||
res = client.get(
|
res = client.get(
|
||||||
@@ -63,7 +68,7 @@ def test_headers_in_request(client, live_server):
|
|||||||
from html import escape
|
from html import escape
|
||||||
assert escape(cookie_header).encode('utf-8') in res.data
|
assert escape(cookie_header).encode('utf-8') in res.data
|
||||||
|
|
||||||
time.sleep(5)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# Re #137 - Examine the JSON index file, it should have only one set of headers entered
|
# Re #137 - Examine the JSON index file, it should have only one set of headers entered
|
||||||
watches_with_headers = 0
|
watches_with_headers = 0
|
||||||
@@ -79,6 +84,9 @@ def test_headers_in_request(client, live_server):
|
|||||||
def test_body_in_request(client, live_server):
|
def test_body_in_request(client, live_server):
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_body', _external=True)
|
test_url = url_for('test_body', _external=True)
|
||||||
|
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||||
|
# Because its no longer calling back to localhost but from browserless, set in test-only.yml
|
||||||
|
test_url = test_url.replace('localhost', 'cdio')
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("import_page"),
|
url_for("import_page"),
|
||||||
@@ -167,6 +175,9 @@ def test_body_in_request(client, live_server):
|
|||||||
def test_method_in_request(client, live_server):
|
def test_method_in_request(client, live_server):
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_method', _external=True)
|
test_url = url_for('test_method', _external=True)
|
||||||
|
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||||
|
# Because its no longer calling back to localhost but from browserless, set in test-only.yml
|
||||||
|
test_url = test_url.replace('localhost', 'cdio')
|
||||||
|
|
||||||
# Add the test URL twice, we will check
|
# Add the test URL twice, we will check
|
||||||
res = client.post(
|
res = client.post(
|
||||||
@@ -234,3 +245,76 @@ def test_method_in_request(client, live_server):
|
|||||||
# Should be only one with method set to PATCH
|
# Should be only one with method set to PATCH
|
||||||
assert watches_with_method == 1
|
assert watches_with_method == 1
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
def test_headers_textfile_in_request(client, live_server):
|
||||||
|
#live_server_setup(live_server)
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for('test_headers', _external=True)
|
||||||
|
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||||
|
# Because its no longer calling back to localhost but from browserless, set in test-only.yml
|
||||||
|
test_url = test_url.replace('localhost', 'cdio')
|
||||||
|
|
||||||
|
print ("TEST URL IS ",test_url)
|
||||||
|
# Add the test URL twice, we will check
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
|
# Add some headers to a request
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={
|
||||||
|
"url": test_url,
|
||||||
|
"tag": "testtag",
|
||||||
|
"fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
|
||||||
|
"headers": "xxx:ooo\ncool:yeah\r\n"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Updated watch." in res.data
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
with open('test-datastore/headers-testtag.txt', 'w') as f:
|
||||||
|
f.write("tag-header: test")
|
||||||
|
|
||||||
|
with open('test-datastore/headers.txt', 'w') as f:
|
||||||
|
f.write("global-header: nice\r\nnext-global-header: nice")
|
||||||
|
|
||||||
|
with open('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt', 'w') as f:
|
||||||
|
f.write("watch-header: nice")
|
||||||
|
|
||||||
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
res = client.get(url_for("edit_page", uuid="first"))
|
||||||
|
assert b"Extra headers file found and will be added to this watch" in res.data
|
||||||
|
|
||||||
|
# Not needed anymore
|
||||||
|
os.unlink('test-datastore/headers.txt')
|
||||||
|
os.unlink('test-datastore/headers-testtag.txt')
|
||||||
|
os.unlink('test-datastore/'+extract_UUID_from_client(client)+'/headers.txt')
|
||||||
|
# The service should echo back the request verb
|
||||||
|
res = client.get(
|
||||||
|
url_for("preview_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"Global-Header:nice" in res.data
|
||||||
|
assert b"Next-Global-Header:nice" in res.data
|
||||||
|
assert b"Xxx:ooo" in res.data
|
||||||
|
assert b"Watch-Header:nice" in res.data
|
||||||
|
assert b"Tag-Header:test" in res.data
|
||||||
|
|
||||||
|
|
||||||
|
#unlink headers.txt on start/stop
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
@@ -119,16 +119,26 @@ def live_server_setup(live_server):
|
|||||||
status_code = request.args.get('status_code')
|
status_code = request.args.get('status_code')
|
||||||
content = request.args.get('content') or None
|
content = request.args.get('content') or None
|
||||||
|
|
||||||
|
# Used to just try to break the header detection
|
||||||
|
uppercase_headers = request.args.get('uppercase_headers')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if content is not None:
|
if content is not None:
|
||||||
resp = make_response(content, status_code)
|
resp = make_response(content, status_code)
|
||||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
if uppercase_headers:
|
||||||
|
ctype=ctype.upper()
|
||||||
|
resp.headers['CONTENT-TYPE'] = ctype if ctype else 'text/html'
|
||||||
|
else:
|
||||||
|
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
||||||
with open("test-datastore/endpoint-content.txt", "r") as f:
|
with open("test-datastore/endpoint-content.txt", "r") as f:
|
||||||
resp = make_response(f.read(), status_code)
|
resp = make_response(f.read(), status_code)
|
||||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
if uppercase_headers:
|
||||||
|
resp.headers['CONTENT-TYPE'] = ctype if ctype else 'text/html'
|
||||||
|
else:
|
||||||
|
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||||
return resp
|
return resp
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return make_response('', status_code)
|
return make_response('', status_code)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import time
|
|||||||
|
|
||||||
from changedetectionio import content_fetcher
|
from changedetectionio import content_fetcher
|
||||||
from .processors.text_json_diff import FilterNotFoundInResponse
|
from .processors.text_json_diff import FilterNotFoundInResponse
|
||||||
|
from .processors.restock_diff import UnableToExtractRestockData
|
||||||
|
|
||||||
# A single update worker
|
# A single update worker
|
||||||
#
|
#
|
||||||
@@ -238,7 +238,7 @@ class update_worker(threading.Thread):
|
|||||||
if not self.datastore.data['watching'].get(uuid):
|
if not self.datastore.data['watching'].get(uuid):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
err_text = "Warning, no filters were found, no change detection ran."
|
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||||
|
|
||||||
# Only when enabled, send the notification
|
# Only when enabled, send the notification
|
||||||
@@ -262,6 +262,7 @@ class update_worker(threading.Thread):
|
|||||||
# Yes fine, so nothing todo, don't continue to process.
|
# Yes fine, so nothing todo, don't continue to process.
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': False})
|
||||||
|
|
||||||
except content_fetcher.BrowserStepsStepTimout as e:
|
except content_fetcher.BrowserStepsStepTimout as e:
|
||||||
|
|
||||||
@@ -315,7 +316,13 @@ class update_worker(threading.Thread):
|
|||||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||||
|
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||||
'last_check_status': e.status_code})
|
'last_check_status': e.status_code,
|
||||||
|
'has_ldjson_price_data': None})
|
||||||
|
process_changedetection_results = False
|
||||||
|
except UnableToExtractRestockData as e:
|
||||||
|
# Usually when fetcher.instock_data returns empty
|
||||||
|
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||||
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Unable to extract restock data for this page unfortunately. (Got code {e.status_code} from server)"})
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||||
|
|||||||
Reference in New Issue
Block a user