Compare commits
63 Commits
0.48.01
...
abstracted
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
80434fa16a | ||
|
|
db10422415 | ||
|
|
380e571ded | ||
|
|
1c2cfc37aa | ||
|
|
0634fe021d | ||
|
|
04934b6b3b | ||
|
|
ff00417bc5 | ||
|
|
849c5b2293 | ||
|
|
4bf560256b | ||
|
|
7903b03a0c | ||
|
|
5e7c0880c1 | ||
|
|
957aef4ff3 | ||
|
|
8e9a83d8f4 | ||
|
|
5961838143 | ||
|
|
8cf4a8128b | ||
|
|
24c3bfe5ad | ||
|
|
bdd9760f3c | ||
|
|
e37467f649 | ||
|
|
d42fdf0257 | ||
|
|
939fa86582 | ||
|
|
b87c92b9e0 | ||
|
|
4d5535d72c | ||
|
|
ad08219d03 | ||
|
|
82211eef82 | ||
|
|
5d9380609c | ||
|
|
a8b3918fca | ||
|
|
e83fb37fb6 | ||
|
|
6b99afe0f7 | ||
|
|
09ebc6ec63 | ||
|
|
6b1065502e | ||
|
|
d4c470984a | ||
|
|
55da48f719 | ||
|
|
dbd4adf23a | ||
|
|
b1e700b3ff | ||
|
|
1c61b5a623 | ||
|
|
e799a1cdcb | ||
|
|
938065db6f | ||
|
|
4f2d38ff49 | ||
|
|
8960f401b7 | ||
|
|
1c1f1c6f6b | ||
|
|
a2a98811a5 | ||
|
|
5a0ef8fc01 | ||
|
|
d90de0851d | ||
|
|
360b4f0d8b | ||
|
|
6fc04d7f1c | ||
|
|
66fb05527b | ||
|
|
202e47d728 | ||
|
|
d67d396b88 | ||
|
|
05f54f0ce6 | ||
|
|
6adf10597e | ||
|
|
4419bc0e61 | ||
|
|
f7e9846c9b | ||
|
|
5dea5e1def | ||
|
|
0fade0a473 | ||
|
|
121e9c20e0 | ||
|
|
12cec2d541 | ||
|
|
d52e6e8e11 | ||
|
|
bae1a89b75 | ||
|
|
e49711f449 | ||
|
|
a3a3ab0622 | ||
|
|
c5fe188b28 | ||
|
|
1fb0adde54 | ||
|
|
2614b275f0 |
23
.github/test/Dockerfile-alpine
vendored
@@ -2,32 +2,33 @@
|
||||
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
|
||||
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
|
||||
|
||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.18
|
||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.21
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
COPY requirements.txt /requirements.txt
|
||||
|
||||
RUN \
|
||||
apk add --update --no-cache --virtual=build-dependencies \
|
||||
apk add --update --no-cache --virtual=build-dependencies \
|
||||
build-base \
|
||||
cargo \
|
||||
g++ \
|
||||
gcc \
|
||||
git \
|
||||
jpeg-dev \
|
||||
libc-dev \
|
||||
libffi-dev \
|
||||
libjpeg \
|
||||
libxslt-dev \
|
||||
make \
|
||||
openssl-dev \
|
||||
py3-wheel \
|
||||
python3-dev \
|
||||
zip \
|
||||
zlib-dev && \
|
||||
apk add --update --no-cache \
|
||||
libjpeg \
|
||||
libxslt \
|
||||
python3 \
|
||||
py3-pip && \
|
||||
nodejs \
|
||||
poppler-utils \
|
||||
python3 && \
|
||||
echo "**** pip3 install test of changedetection.io ****" && \
|
||||
pip3 install -U pip wheel setuptools && \
|
||||
pip3 install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.18/ -r /requirements.txt && \
|
||||
python3 -m venv /lsiopy && \
|
||||
pip install -U pip wheel setuptools && \
|
||||
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \
|
||||
apk del --purge \
|
||||
build-dependencies
|
||||
|
||||
19
.github/workflows/containers.yml
vendored
@@ -103,6 +103,19 @@ jobs:
|
||||
# provenance: false
|
||||
|
||||
# A new tagged release is required, which builds :tag and :latest
|
||||
- name: Docker meta :tag
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta
|
||||
with:
|
||||
images: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
|
||||
ghcr.io/dgtlmoon/changedetection.io
|
||||
tags: |
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=semver,pattern={{major}}
|
||||
|
||||
- name: Build and push :tag
|
||||
id: docker_build_tag_release
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
@@ -111,11 +124,7 @@ jobs:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }}
|
||||
ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest
|
||||
ghcr.io/dgtlmoon/changedetection.io:latest
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
7
.github/workflows/pypi-release.yml
vendored
@@ -45,9 +45,12 @@ jobs:
|
||||
- name: Test that the basic pip built package runs without error
|
||||
run: |
|
||||
set -ex
|
||||
sudo pip3 install --upgrade pip
|
||||
pip3 install dist/changedetection.io*.whl
|
||||
ls -alR
|
||||
|
||||
# Find and install the first .whl file
|
||||
find dist -type f -name "*.whl" -exec pip3 install {} \; -quit
|
||||
changedetection.io -d /tmp -p 10000 &
|
||||
|
||||
sleep 3
|
||||
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
|
||||
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null
|
||||
|
||||
@@ -64,14 +64,16 @@ jobs:
|
||||
echo "Running processes in docker..."
|
||||
docker ps
|
||||
|
||||
- name: Test built container with Pytest (generally as requests/plaintext fetching)
|
||||
- name: Run Unit Tests
|
||||
run: |
|
||||
# Unit tests
|
||||
echo "run test with unittest"
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
|
||||
- name: Test built container with Pytest (generally as requests/plaintext fetching)
|
||||
run: |
|
||||
# All tests
|
||||
echo "run test with pytest"
|
||||
# The default pytest logger_level is TRACE
|
||||
|
||||
@@ -5,6 +5,7 @@ recursive-include changedetectionio/content_fetchers *
|
||||
recursive-include changedetectionio/model *
|
||||
recursive-include changedetectionio/processors *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/storage *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/tests *
|
||||
prune changedetectionio/static/package-lock.json
|
||||
|
||||
11
README.md
@@ -105,13 +105,22 @@ We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) glob
|
||||
|
||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
||||
|
||||
### Schedule web page watches in any timezone, limit by day of week and time.
|
||||
|
||||
Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours.
|
||||
Or perhaps based on a foreign timezone (for example, you want to check for the latest news-headlines in a foreign country at 0900 AM),
|
||||
|
||||
<img src="./docs/scheduler.png" style="max-width:80%;" alt="How to monitor web page changes according to a schedule" title="How to monitor web page changes according to a schedule" />
|
||||
|
||||
Includes quick short-cut buttons to setup a schedule for **business hours only**, or **weekends**.
|
||||
|
||||
### We have a Chrome extension!
|
||||
|
||||
Easily add the current web page to your changedetection.io tool, simply install the extension and click "Sync" to connect it to your existing changedetection.io install.
|
||||
|
||||
[<img src="./docs/chrome-extension-screenshot.png" style="max-width:80%;" alt="Chrome Extension to easily add the current web-page to detect a change." title="Chrome Extension to easily add the current web-page to detect a change." />](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop)
|
||||
|
||||
[Goto the Chrome Webstore to download the extension.](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop)
|
||||
[Goto the Chrome Webstore to download the extension.](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop) ( Or check out the [GitHub repo](https://github.com/dgtlmoon/changedetection.io-browser-extension) )
|
||||
|
||||
## Installation
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.48.01'
|
||||
__version__ = '0.49.4'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
@@ -24,6 +24,9 @@ from loguru import logger
|
||||
app = None
|
||||
datastore = None
|
||||
|
||||
def get_version():
|
||||
return __version__
|
||||
|
||||
# Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown
|
||||
def sigshutdown_handler(_signo, _stack_frame):
|
||||
global app
|
||||
|
||||
@@ -112,6 +112,35 @@ def build_watch_json_schema(d):
|
||||
|
||||
schema['properties']['time_between_check'] = build_time_between_check_json_schema()
|
||||
|
||||
schema['properties']['browser_steps'] = {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"operation": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000 # Allows null and any string up to 5000 chars (including "")
|
||||
},
|
||||
"selector": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000
|
||||
},
|
||||
"optional_value": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000
|
||||
}
|
||||
},
|
||||
"required": ["operation", "selector", "optional_value"],
|
||||
"additionalProperties": False # No extra keys allowed
|
||||
}
|
||||
},
|
||||
{"type": "null"}, # Allows null for `browser_steps`
|
||||
{"type": "array", "maxItems": 0} # Allows empty array []
|
||||
]
|
||||
}
|
||||
|
||||
# headers ?
|
||||
return schema
|
||||
|
||||
|
||||
@@ -76,6 +76,7 @@ class Watch(Resource):
|
||||
# Return without history, get that via another API call
|
||||
# Properties are not returned as a JSON, so add the required props manually
|
||||
watch['history_n'] = watch.history_n
|
||||
# attr .last_changed will check for the last written text snapshot on change
|
||||
watch['last_changed'] = watch.last_changed
|
||||
watch['viewed'] = watch.viewed
|
||||
return watch
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from changedetectionio import apprise_plugin
|
||||
import apprise
|
||||
|
||||
# Create our AppriseAsset and populate it with some of our new values:
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# include the decorator
|
||||
from apprise.decorators import notify
|
||||
from loguru import logger
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
|
||||
@notify(on="delete")
|
||||
@notify(on="deletes")
|
||||
@@ -13,70 +15,84 @@ from loguru import logger
|
||||
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
|
||||
import requests
|
||||
import json
|
||||
import re
|
||||
|
||||
from urllib.parse import unquote_plus
|
||||
from apprise.utils import parse_url as apprise_parse_url
|
||||
from apprise import URLBase
|
||||
from apprise.utils.parse import parse_url as apprise_parse_url
|
||||
|
||||
url = kwargs['meta'].get('url')
|
||||
schema = kwargs['meta'].get('schema').lower().strip()
|
||||
|
||||
if url.startswith('post'):
|
||||
r = requests.post
|
||||
elif url.startswith('get'):
|
||||
r = requests.get
|
||||
elif url.startswith('put'):
|
||||
r = requests.put
|
||||
elif url.startswith('delete'):
|
||||
r = requests.delete
|
||||
# Choose POST, GET etc from requests
|
||||
method = re.sub(rf's$', '', schema)
|
||||
requests_method = getattr(requests, method)
|
||||
|
||||
url = url.replace('post://', 'http://')
|
||||
url = url.replace('posts://', 'https://')
|
||||
url = url.replace('put://', 'http://')
|
||||
url = url.replace('puts://', 'https://')
|
||||
url = url.replace('get://', 'http://')
|
||||
url = url.replace('gets://', 'https://')
|
||||
url = url.replace('put://', 'http://')
|
||||
url = url.replace('puts://', 'https://')
|
||||
url = url.replace('delete://', 'http://')
|
||||
url = url.replace('deletes://', 'https://')
|
||||
|
||||
headers = {}
|
||||
params = {}
|
||||
params = CaseInsensitiveDict({}) # Added to requests
|
||||
auth = None
|
||||
has_error = False
|
||||
|
||||
# Convert /foobar?+some-header=hello to proper header dictionary
|
||||
results = apprise_parse_url(url)
|
||||
if results:
|
||||
# Add our headers that the user can potentially over-ride if they wish
|
||||
# to to our returned result set and tidy entries by unquoting them
|
||||
headers = {unquote_plus(x): unquote_plus(y)
|
||||
for x, y in results['qsd+'].items()}
|
||||
|
||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
|
||||
# but here we are making straight requests, so we need todo convert this against apprise's logic
|
||||
for k, v in results['qsd'].items():
|
||||
if not k.strip('+-') in results['qsd+'].keys():
|
||||
params[unquote_plus(k)] = unquote_plus(v)
|
||||
# Add our headers that the user can potentially over-ride if they wish
|
||||
# to to our returned result set and tidy entries by unquoting them
|
||||
headers = CaseInsensitiveDict({unquote_plus(x): unquote_plus(y)
|
||||
for x, y in results['qsd+'].items()})
|
||||
|
||||
# Determine Authentication
|
||||
auth = ''
|
||||
if results.get('user') and results.get('password'):
|
||||
auth = (unquote_plus(results.get('user')), unquote_plus(results.get('user')))
|
||||
elif results.get('user'):
|
||||
auth = (unquote_plus(results.get('user')))
|
||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
|
||||
# but here we are making straight requests, so we need todo convert this against apprise's logic
|
||||
for k, v in results['qsd'].items():
|
||||
if not k.strip('+-') in results['qsd+'].keys():
|
||||
params[unquote_plus(k)] = unquote_plus(v)
|
||||
|
||||
# Try to auto-guess if it's JSON
|
||||
h = 'application/json; charset=utf-8'
|
||||
# Determine Authentication
|
||||
auth = ''
|
||||
if results.get('user') and results.get('password'):
|
||||
auth = (unquote_plus(results.get('user')), unquote_plus(results.get('user')))
|
||||
elif results.get('user'):
|
||||
auth = (unquote_plus(results.get('user')))
|
||||
|
||||
# If it smells like it could be JSON and no content-type was already set, offer a default content type.
|
||||
if body and '{' in body[:100] and not headers.get('Content-Type'):
|
||||
json_header = 'application/json; charset=utf-8'
|
||||
try:
|
||||
# Try if it's JSON
|
||||
json.loads(body)
|
||||
headers['Content-Type'] = json_header
|
||||
except ValueError as e:
|
||||
logger.warning(f"Could not automatically add '{json_header}' header to the notification because the document failed to parse as JSON: {e}")
|
||||
pass
|
||||
|
||||
# POSTS -> HTTPS etc
|
||||
if schema.lower().endswith('s'):
|
||||
url = re.sub(rf'^{schema}', 'https', results.get('url'))
|
||||
else:
|
||||
url = re.sub(rf'^{schema}', 'http', results.get('url'))
|
||||
|
||||
status_str = ''
|
||||
try:
|
||||
json.loads(body)
|
||||
headers['Content-Type'] = h
|
||||
except ValueError as e:
|
||||
logger.warning(f"Could not automatically add '{h}' header to the {kwargs['meta'].get('schema')}:// notification because the document failed to parse as JSON: {e}")
|
||||
pass
|
||||
r = requests_method(url,
|
||||
auth=auth,
|
||||
data=body.encode('utf-8') if type(body) is str else body,
|
||||
headers=headers,
|
||||
params=params
|
||||
)
|
||||
|
||||
r(results.get('url'),
|
||||
auth=auth,
|
||||
data=body.encode('utf-8') if type(body) is str else body,
|
||||
headers=headers,
|
||||
params=params
|
||||
)
|
||||
if not (200 <= r.status_code < 300):
|
||||
status_str = f"Error sending '{method.upper()}' request to {url} - Status: {r.status_code}: '{r.reason}'"
|
||||
logger.error(status_str)
|
||||
has_error = True
|
||||
else:
|
||||
logger.info(f"Sent '{method.upper()}' request to {url}")
|
||||
has_error = False
|
||||
|
||||
except requests.RequestException as e:
|
||||
status_str = f"Error sending '{method.upper()}' request to {url} - {str(e)}"
|
||||
logger.error(status_str)
|
||||
has_error = True
|
||||
|
||||
if has_error:
|
||||
raise TypeError(status_str)
|
||||
|
||||
return True
|
||||
|
||||
@@ -22,7 +22,10 @@ from loguru import logger
|
||||
|
||||
browsersteps_sessions = {}
|
||||
io_interface_context = None
|
||||
|
||||
import json
|
||||
import base64
|
||||
import hashlib
|
||||
from flask import Response
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
||||
@@ -85,7 +88,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browsersteps_start_session['browser'],
|
||||
proxy=proxy,
|
||||
start_url=datastore.data['watching'][watch_uuid].get('url'),
|
||||
start_url=datastore.data['watching'][watch_uuid].link,
|
||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||
)
|
||||
|
||||
@@ -160,14 +163,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
if not browsersteps_sessions.get(browsersteps_session_id):
|
||||
return make_response('No session exists under that ID', 500)
|
||||
|
||||
|
||||
is_last_step = False
|
||||
# Actions - step/apply/etc, do the thing and return state
|
||||
if request.method == 'POST':
|
||||
# @todo - should always be an existing session
|
||||
step_operation = request.form.get('operation')
|
||||
step_selector = request.form.get('selector')
|
||||
step_optional_value = request.form.get('optional_value')
|
||||
step_n = int(request.form.get('step_n'))
|
||||
is_last_step = strtobool(request.form.get('is_last_step'))
|
||||
|
||||
# @todo try.. accept.. nice errors not popups..
|
||||
@@ -182,16 +184,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Try to find something of value to give back to the user
|
||||
return make_response(str(e).splitlines()[0], 401)
|
||||
|
||||
# Get visual selector ready/update its data (also use the current filter info from the page?)
|
||||
# When the last 'apply' button was pressed
|
||||
# @todo this adds overhead because the xpath selection is happening twice
|
||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||
if is_last_step and u:
|
||||
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch:
|
||||
watch.save_screenshot(screenshot=screenshot)
|
||||
watch.save_xpath_data(data=xpath_data)
|
||||
|
||||
# if not this_session.page:
|
||||
# cleanup_playwright_session()
|
||||
@@ -199,31 +191,35 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
# Screenshots and other info only needed on requesting a step (POST)
|
||||
try:
|
||||
state = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||
if is_last_step:
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||
if watch and u:
|
||||
watch.save_screenshot(screenshot=screenshot)
|
||||
watch.save_xpath_data(data=xpath_data)
|
||||
|
||||
except playwright._impl._api_types.Error as e:
|
||||
return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
|
||||
except Exception as e:
|
||||
return make_response("Error fetching screenshot and element data - " + str(e), 401)
|
||||
|
||||
# Use send_file() which is way faster than read/write loop on bytes
|
||||
import json
|
||||
from tempfile import mkstemp
|
||||
from flask import send_file
|
||||
tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-")
|
||||
# SEND THIS BACK TO THE BROWSER
|
||||
|
||||
output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format(
|
||||
base64.b64encode(state[0]).decode('ascii')),
|
||||
'xpath_data': state[1],
|
||||
'session_age_start': browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
|
||||
'browser_time_remaining': round(remaining)
|
||||
})
|
||||
output = {
|
||||
"screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}",
|
||||
"xpath_data": xpath_data,
|
||||
"session_age_start": browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
|
||||
"browser_time_remaining": round(remaining)
|
||||
}
|
||||
json_data = json.dumps(output)
|
||||
|
||||
with os.fdopen(tmp_fd, 'w') as f:
|
||||
f.write(output)
|
||||
# Generate an ETag (hash of the response body)
|
||||
etag_hash = hashlib.md5(json_data.encode('utf-8')).hexdigest()
|
||||
|
||||
response = make_response(send_file(path_or_file=tmp_file,
|
||||
mimetype='application/json; charset=UTF-8',
|
||||
etag=True))
|
||||
# No longer needed
|
||||
os.unlink(tmp_file)
|
||||
# Create the response with ETag
|
||||
response = Response(json_data, mimetype="application/json; charset=UTF-8")
|
||||
response.set_etag(etag_hash)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import time
|
||||
import re
|
||||
from random import randint
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
|
||||
|
||||
|
||||
# Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
|
||||
# 0- off, 1- on
|
||||
browser_step_ui_config = {'Choose one': '0 0',
|
||||
@@ -31,6 +32,7 @@ browser_step_ui_config = {'Choose one': '0 0',
|
||||
# 'Extract text and use as filter': '1 0',
|
||||
'Goto site': '0 0',
|
||||
'Goto URL': '0 1',
|
||||
'Make all child elements visible': '1 0',
|
||||
'Press Enter': '0 0',
|
||||
'Select by label': '1 1',
|
||||
'Scroll down': '0 0',
|
||||
@@ -38,6 +40,7 @@ browser_step_ui_config = {'Choose one': '0 0',
|
||||
'Wait for seconds': '0 1',
|
||||
'Wait for text': '0 1',
|
||||
'Wait for text in element': '1 1',
|
||||
'Remove elements': '1 0',
|
||||
# 'Press Page Down': '0 0',
|
||||
# 'Press Page Up': '0 0',
|
||||
# weird bug, come back to it later
|
||||
@@ -52,6 +55,8 @@ class steppable_browser_interface():
|
||||
page = None
|
||||
start_url = None
|
||||
|
||||
action_timeout = 10 * 1000
|
||||
|
||||
def __init__(self, start_url):
|
||||
self.start_url = start_url
|
||||
|
||||
@@ -102,7 +107,7 @@ class steppable_browser_interface():
|
||||
return
|
||||
elem = self.page.get_by_text(value)
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=3000)
|
||||
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
|
||||
def action_click_element_containing_text_if_exists(self, selector=None, value=''):
|
||||
logger.debug("Clicking element containing text if exists")
|
||||
@@ -111,7 +116,7 @@ class steppable_browser_interface():
|
||||
elem = self.page.get_by_text(value)
|
||||
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=3000)
|
||||
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
else:
|
||||
return
|
||||
|
||||
@@ -119,7 +124,7 @@ class steppable_browser_interface():
|
||||
if not len(selector.strip()):
|
||||
return
|
||||
|
||||
self.page.fill(selector, value, timeout=10 * 1000)
|
||||
self.page.fill(selector, value, timeout=self.action_timeout)
|
||||
|
||||
def action_execute_js(self, selector, value):
|
||||
response = self.page.evaluate(value)
|
||||
@@ -130,7 +135,7 @@ class steppable_browser_interface():
|
||||
if not len(selector.strip()):
|
||||
return
|
||||
|
||||
self.page.click(selector=selector, timeout=30 * 1000, delay=randint(200, 500))
|
||||
self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
|
||||
|
||||
def action_click_element_if_exists(self, selector, value):
|
||||
import playwright._impl._errors as _api_types
|
||||
@@ -138,7 +143,7 @@ class steppable_browser_interface():
|
||||
if not len(selector.strip()):
|
||||
return
|
||||
try:
|
||||
self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))
|
||||
self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
|
||||
except _api_types.TimeoutError as e:
|
||||
return
|
||||
except _api_types.Error as e:
|
||||
@@ -185,11 +190,29 @@ class steppable_browser_interface():
|
||||
self.page.keyboard.press("PageDown", delay=randint(200, 500))
|
||||
|
||||
def action_check_checkbox(self, selector, value):
|
||||
self.page.locator(selector).check(timeout=1000)
|
||||
self.page.locator(selector).check(timeout=self.action_timeout)
|
||||
|
||||
def action_uncheck_checkbox(self, selector, value):
|
||||
self.page.locator(selector, timeout=1000).uncheck(timeout=1000)
|
||||
self.page.locator(selector).uncheck(timeout=self.action_timeout)
|
||||
|
||||
def action_remove_elements(self, selector, value):
|
||||
"""Removes all elements matching the given selector from the DOM."""
|
||||
self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
|
||||
|
||||
def action_make_all_child_elements_visible(self, selector, value):
|
||||
"""Recursively makes all child elements inside the given selector fully visible."""
|
||||
self.page.locator(selector).locator("*").evaluate_all("""
|
||||
els => els.forEach(el => {
|
||||
el.style.display = 'block'; // Forces it to be displayed
|
||||
el.style.visibility = 'visible'; // Ensures it's not hidden
|
||||
el.style.opacity = '1'; // Fully opaque
|
||||
el.style.position = 'relative'; // Avoids 'absolute' hiding
|
||||
el.style.height = 'auto'; // Expands collapsed elements
|
||||
el.style.width = 'auto'; // Ensures full visibility
|
||||
el.removeAttribute('hidden'); // Removes hidden attribute
|
||||
el.classList.remove('hidden', 'd-none'); // Removes common CSS hidden classes
|
||||
})
|
||||
""")
|
||||
|
||||
# Responsible for maintaining a live 'context' with the chrome CDP
|
||||
# @todo - how long do contexts live for anyway?
|
||||
@@ -257,6 +280,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
|
||||
def mark_as_closed(self):
|
||||
logger.debug("Page closed, cleaning up..")
|
||||
|
||||
@@ -274,39 +298,30 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
now = time.time()
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
# The actual screenshot
|
||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
|
||||
|
||||
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
||||
logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
||||
screenshot = capture_stitched_together_full_page(self.page)
|
||||
else:
|
||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
|
||||
|
||||
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
||||
|
||||
now = time.time()
|
||||
self.page.evaluate("var include_filters=''")
|
||||
# Go find the interactive elements
|
||||
# @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
|
||||
elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
|
||||
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements)
|
||||
|
||||
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
||||
# So the JS will find the smallest one first
|
||||
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
||||
logger.debug(f"Time to complete get_current_state of browser {time.time()-now:.2f}s")
|
||||
# except
|
||||
logger.debug(f"Time to scrape xpath element data in browser {time.time()-now:.2f}s")
|
||||
|
||||
# playwright._impl._api_types.Error: Browser closed.
|
||||
# @todo show some countdown timer?
|
||||
return (screenshot, xpath_data)
|
||||
|
||||
def request_visualselector_data(self):
|
||||
"""
|
||||
Does the same that the playwright operation in content_fetcher does
|
||||
This is used to just bump the VisualSelector data so it' ready to go if they click on the tab
|
||||
@todo refactor and remove duplicate code, add include_filters
|
||||
:param xpath_data:
|
||||
:param screenshot:
|
||||
:param current_include_filters:
|
||||
:return:
|
||||
"""
|
||||
import importlib.resources
|
||||
self.page.evaluate("var include_filters=''")
|
||||
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
||||
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
||||
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
|
||||
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||
|
||||
return (screenshot, xpath_data)
|
||||
|
||||
104
changedetectionio/content_fetchers/helpers.py
Normal file
@@ -0,0 +1,104 @@
|
||||
|
||||
# Pages with a vertical height longer than this will use the 'stitch together' method.
|
||||
|
||||
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
|
||||
# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
|
||||
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
|
||||
|
||||
|
||||
# The size at which we will switch to stitching method
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD=8000
|
||||
|
||||
from loguru import logger
|
||||
|
||||
def capture_stitched_together_full_page(page):
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4 # Maximum total height for the final image (When in stitch mode)
|
||||
MAX_CHUNK_HEIGHT = 4000 # Height per screenshot chunk
|
||||
WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay
|
||||
|
||||
# Save the original viewport size
|
||||
original_viewport = page.viewport_size
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
viewport = page.viewport_size
|
||||
page_height = page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
# Limit the total capture height
|
||||
capture_height = min(page_height, MAX_TOTAL_HEIGHT)
|
||||
|
||||
images = []
|
||||
total_captured_height = 0
|
||||
|
||||
for offset in range(0, capture_height, MAX_CHUNK_HEIGHT):
|
||||
# Ensure we do not exceed the total height limit
|
||||
chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height)
|
||||
|
||||
# Adjust viewport size for this chunk
|
||||
page.set_viewport_size({"width": viewport["width"], "height": chunk_height})
|
||||
|
||||
# Scroll to the correct position
|
||||
page.evaluate(f"window.scrollTo(0, {offset})")
|
||||
|
||||
# Capture screenshot chunk
|
||||
screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
images.append(Image.open(io.BytesIO(screenshot_bytes)))
|
||||
|
||||
total_captured_height += chunk_height
|
||||
|
||||
# Stop if we reached the maximum total height
|
||||
if total_captured_height >= MAX_TOTAL_HEIGHT:
|
||||
break
|
||||
|
||||
# Create the final stitched image
|
||||
stitched_image = Image.new('RGB', (viewport["width"], total_captured_height))
|
||||
y_offset = 0
|
||||
|
||||
# Stitch the screenshot chunks together
|
||||
for img in images:
|
||||
stitched_image.paste(img, (0, y_offset))
|
||||
y_offset += img.height
|
||||
|
||||
logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s")
|
||||
|
||||
# Overlay warning text if the screenshot was trimmed
|
||||
if page_height > MAX_TOTAL_HEIGHT:
|
||||
draw = ImageDraw.Draw(stitched_image)
|
||||
warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
|
||||
|
||||
# Load font (default system font if Arial is unavailable)
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT) # Arial (Windows/Mac)
|
||||
except IOError:
|
||||
font = ImageFont.load_default() # Default font if Arial not found
|
||||
|
||||
# Get text bounding box (correct method for newer Pillow versions)
|
||||
text_bbox = draw.textbbox((0, 0), warning_text, font=font)
|
||||
text_width = text_bbox[2] - text_bbox[0] # Calculate text width
|
||||
text_height = text_bbox[3] - text_bbox[1] # Calculate text height
|
||||
|
||||
# Define background rectangle (top of the image)
|
||||
draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white")
|
||||
|
||||
# Center text horizontally within the warning area
|
||||
text_x = (viewport["width"] - text_width) // 2
|
||||
text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
|
||||
|
||||
# Draw the warning text in red
|
||||
draw.text((text_x, text_y), warning_text, fill="red", font=font)
|
||||
|
||||
# Save or return the final image
|
||||
output = io.BytesIO()
|
||||
stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
screenshot = output.getvalue()
|
||||
|
||||
finally:
|
||||
# Restore the original viewport size
|
||||
page.set_viewport_size(original_viewport)
|
||||
|
||||
return screenshot
|
||||
@@ -4,6 +4,7 @@ from urllib.parse import urlparse
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
@@ -89,6 +90,7 @@ class fetcher(Fetcher):
|
||||
from playwright.sync_api import sync_playwright
|
||||
import playwright._impl._errors
|
||||
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
||||
import time
|
||||
self.delete_browser_steps_screenshots()
|
||||
response = None
|
||||
|
||||
@@ -179,6 +181,7 @@ class fetcher(Fetcher):
|
||||
|
||||
self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
now = time.time()
|
||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||
if current_include_filters is not None:
|
||||
self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||
@@ -190,6 +193,8 @@ class fetcher(Fetcher):
|
||||
self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
|
||||
|
||||
self.content = self.page.content()
|
||||
logger.debug(f"Time to scrape xpath element data in browser {time.time() - now:.2f}s")
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
# JPEG is better here because the screenshots can be very very large
|
||||
@@ -199,10 +204,15 @@ class fetcher(Fetcher):
|
||||
# acceptable screenshot quality here
|
||||
try:
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = self.page.screenshot(type='jpeg',
|
||||
full_page=True,
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)),
|
||||
)
|
||||
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
||||
logger.warning(
|
||||
f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
||||
self.screenshot = capture_stitched_together_full_page(self.page)
|
||||
else:
|
||||
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||
|
||||
@@ -29,8 +29,11 @@ function isItemInStock() {
|
||||
'currently unavailable',
|
||||
'dieser artikel ist bald wieder verfügbar',
|
||||
'dostępne wkrótce',
|
||||
'en rupture',
|
||||
'en rupture de stock',
|
||||
'épuisé',
|
||||
'esgotado',
|
||||
'indisponible',
|
||||
'indisponível',
|
||||
'isn\'t in stock right now',
|
||||
'isnt in stock right now',
|
||||
@@ -52,6 +55,8 @@ function isItemInStock() {
|
||||
'niet leverbaar',
|
||||
'niet op voorraad',
|
||||
'no disponible',
|
||||
'non disponibile',
|
||||
'non disponible',
|
||||
'no longer in stock',
|
||||
'no tickets available',
|
||||
'not available',
|
||||
@@ -64,8 +69,10 @@ function isItemInStock() {
|
||||
'não estamos a aceitar encomendas',
|
||||
'out of stock',
|
||||
'out-of-stock',
|
||||
'plus disponible',
|
||||
'prodotto esaurito',
|
||||
'produkt niedostępny',
|
||||
'rupture',
|
||||
'sold out',
|
||||
'sold-out',
|
||||
'stokta yok',
|
||||
|
||||
@@ -41,7 +41,7 @@ const findUpTag = (el) => {
|
||||
|
||||
// Strategy 1: If it's an input, with name, and there's only one, prefer that
|
||||
if (el.name !== undefined && el.name.length) {
|
||||
var proposed = el.tagName + "[name=" + el.name + "]";
|
||||
var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
|
||||
var proposed_element = window.document.querySelectorAll(proposed);
|
||||
if (proposed_element.length) {
|
||||
if (proposed_element.length === 1) {
|
||||
@@ -102,13 +102,15 @@ function collectVisibleElements(parent, visibleElements) {
|
||||
const children = parent.children;
|
||||
for (let i = 0; i < children.length; i++) {
|
||||
const child = children[i];
|
||||
const computedStyle = window.getComputedStyle(child);
|
||||
|
||||
if (
|
||||
child.nodeType === Node.ELEMENT_NODE &&
|
||||
window.getComputedStyle(child).display !== 'none' &&
|
||||
window.getComputedStyle(child).visibility !== 'hidden' &&
|
||||
computedStyle.display !== 'none' &&
|
||||
computedStyle.visibility !== 'hidden' &&
|
||||
child.offsetWidth >= 0 &&
|
||||
child.offsetHeight >= 0 &&
|
||||
window.getComputedStyle(child).contentVisibility !== 'hidden'
|
||||
computedStyle.contentVisibility !== 'hidden'
|
||||
) {
|
||||
// If the child is an element and is visible, recursively collect visible elements
|
||||
collectVisibleElements(child, visibleElements);
|
||||
@@ -173,6 +175,7 @@ visibleElementsArray.forEach(function (element) {
|
||||
|
||||
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
|
||||
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;
|
||||
const computedStyle = window.getComputedStyle(element);
|
||||
|
||||
size_pos.push({
|
||||
xpath: xpath_result,
|
||||
@@ -184,10 +187,10 @@ visibleElementsArray.forEach(function (element) {
|
||||
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
|
||||
// tagtype used by Browser Steps
|
||||
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
|
||||
isClickable: window.getComputedStyle(element).cursor === "pointer",
|
||||
isClickable: computedStyle.cursor === "pointer",
|
||||
// Used by the keras trainer
|
||||
fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
|
||||
fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
|
||||
fontSize: computedStyle.getPropertyValue('font-size'),
|
||||
fontWeight: computedStyle.getPropertyValue('font-weight'),
|
||||
hasDigitCurrency: hasDigitCurrency,
|
||||
label: label,
|
||||
});
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import difflib
|
||||
from typing import List, Iterator, Union
|
||||
|
||||
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
|
||||
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
|
||||
|
||||
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
|
||||
"""Return a slice of the list, or a single element if start == end."""
|
||||
return lst[start:end] if start != end else [lst[start]]
|
||||
@@ -12,11 +15,12 @@ def customSequenceMatcher(
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True
|
||||
include_change_type_prefix: bool = True,
|
||||
html_colour: bool = False
|
||||
) -> Iterator[List[str]]:
|
||||
"""
|
||||
Compare two sequences and yield differences based on specified parameters.
|
||||
|
||||
|
||||
Args:
|
||||
before (List[str]): Original sequence
|
||||
after (List[str]): Modified sequence
|
||||
@@ -25,26 +29,35 @@ def customSequenceMatcher(
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
|
||||
html_colour (bool): Use HTML background colors for differences
|
||||
|
||||
Yields:
|
||||
List[str]: Differences between sequences
|
||||
"""
|
||||
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after)
|
||||
|
||||
|
||||
|
||||
|
||||
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
|
||||
if include_equal and tag == 'equal':
|
||||
yield before[alo:ahi]
|
||||
elif include_removed and tag == 'delete':
|
||||
prefix = "(removed) " if include_change_type_prefix else ''
|
||||
yield [f"{prefix}{line}" for line in same_slicer(before, alo, ahi)]
|
||||
if html_colour:
|
||||
yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)]
|
||||
else:
|
||||
yield [f"(removed) {line}" for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi)
|
||||
elif include_replaced and tag == 'replace':
|
||||
prefix_changed = "(changed) " if include_change_type_prefix else ''
|
||||
prefix_into = "(into) " if include_change_type_prefix else ''
|
||||
yield [f"{prefix_changed}{line}" for line in same_slicer(before, alo, ahi)] + \
|
||||
[f"{prefix_into}{line}" for line in same_slicer(after, blo, bhi)]
|
||||
if html_colour:
|
||||
yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] + \
|
||||
[f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield [f"(changed) {line}" for line in same_slicer(before, alo, ahi)] + \
|
||||
[f"(into) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi)
|
||||
elif include_added and tag == 'insert':
|
||||
prefix = "(added) " if include_change_type_prefix else ''
|
||||
yield [f"{prefix}{line}" for line in same_slicer(after, blo, bhi)]
|
||||
if html_colour:
|
||||
yield [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield [f"(added) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(after, blo, bhi)
|
||||
|
||||
def render_diff(
|
||||
previous_version_file_contents: str,
|
||||
@@ -55,11 +68,12 @@ def render_diff(
|
||||
include_replaced: bool = True,
|
||||
line_feed_sep: str = "\n",
|
||||
include_change_type_prefix: bool = True,
|
||||
patch_format: bool = False
|
||||
patch_format: bool = False,
|
||||
html_colour: bool = False
|
||||
) -> str:
|
||||
"""
|
||||
Render the difference between two file contents.
|
||||
|
||||
|
||||
Args:
|
||||
previous_version_file_contents (str): Original file contents
|
||||
newest_version_file_contents (str): Modified file contents
|
||||
@@ -70,7 +84,8 @@ def render_diff(
|
||||
line_feed_sep (str): Separator for lines in output
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
patch_format (bool): Use patch format for output
|
||||
|
||||
html_colour (bool): Use HTML background colors for differences
|
||||
|
||||
Returns:
|
||||
str: Rendered difference
|
||||
"""
|
||||
@@ -88,10 +103,11 @@ def render_diff(
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
include_change_type_prefix=include_change_type_prefix
|
||||
include_change_type_prefix=include_change_type_prefix,
|
||||
html_colour=html_colour
|
||||
)
|
||||
|
||||
def flatten(lst: List[Union[str, List[str]]]) -> str:
|
||||
return line_feed_sep.join(flatten(x) if isinstance(x, list) else x for x in lst)
|
||||
|
||||
return flatten(rendered_diff)
|
||||
return flatten(rendered_diff)
|
||||
@@ -598,17 +598,31 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
if 'notification_title' in request.form and request.form['notification_title'].strip():
|
||||
n_object['notification_title'] = request.form.get('notification_title', '').strip()
|
||||
elif datastore.data['settings']['application'].get('notification_title'):
|
||||
n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
|
||||
else:
|
||||
n_object['notification_title'] = "Test title"
|
||||
|
||||
if 'notification_body' in request.form and request.form['notification_body'].strip():
|
||||
n_object['notification_body'] = request.form.get('notification_body', '').strip()
|
||||
elif datastore.data['settings']['application'].get('notification_body'):
|
||||
n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
|
||||
else:
|
||||
n_object['notification_body'] = "Test body"
|
||||
|
||||
n_object['as_async'] = False
|
||||
n_object.update(watch.extra_notification_token_values())
|
||||
from .notification import process_notification
|
||||
sent_obj = process_notification(n_object, datastore)
|
||||
|
||||
from . import update_worker
|
||||
new_worker = update_worker.update_worker(update_q, notification_q, app, datastore)
|
||||
new_worker.queue_notification_for_watch(notification_q=notification_q, n_object=n_object, watch=watch)
|
||||
except Exception as e:
|
||||
return make_response(f"Error: str(e)", 400)
|
||||
e_str = str(e)
|
||||
# Remove this text which is not important and floods the container
|
||||
e_str = e_str.replace(
|
||||
"DEBUG - <class 'apprise.decorators.base.CustomNotifyPlugin.instantiate_plugin.<locals>.CustomNotifyPluginWrapper'>",
|
||||
'')
|
||||
|
||||
return make_response(e_str, 400)
|
||||
|
||||
return 'OK - Sent test notifications'
|
||||
|
||||
@@ -841,7 +855,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
return redirect(url_for('diff_history_page', uuid=uuid))
|
||||
|
||||
return redirect(url_for('index'))
|
||||
return redirect(url_for('index', tag=request.args.get("tag",'')))
|
||||
|
||||
else:
|
||||
if request.method == 'POST' and not form.validate():
|
||||
@@ -861,14 +875,14 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
is_html_webdriver = False
|
||||
watch_uses_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
watch_uses_webdriver = True
|
||||
|
||||
from zoneinfo import available_timezones
|
||||
|
||||
# Only works reliably with Playwright
|
||||
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver
|
||||
|
||||
template_args = {
|
||||
'available_processors': processors.available_processors(),
|
||||
'available_timezones': sorted(available_timezones()),
|
||||
@@ -881,14 +895,13 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
|
||||
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
|
||||
'is_html_webdriver': is_html_webdriver,
|
||||
'watch_uses_webdriver': watch_uses_webdriver,
|
||||
'jq_support': jq_support,
|
||||
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||
'settings_application': datastore.data['settings']['application'],
|
||||
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'visualselector_enabled': visualselector_enabled,
|
||||
'watch': watch
|
||||
}
|
||||
|
||||
@@ -1395,13 +1408,13 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
if new_uuid:
|
||||
if add_paused:
|
||||
flash('Watch added in Paused state, saving will unpause.')
|
||||
return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
|
||||
return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
|
||||
else:
|
||||
# Straight into the queue.
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
flash("Watch added.")
|
||||
|
||||
return redirect(url_for('index'))
|
||||
return redirect(url_for('index', tag=request.args.get('tag','')))
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ class validateTimeZoneName(object):
|
||||
|
||||
class ScheduleLimitDaySubForm(Form):
|
||||
enabled = BooleanField("not set", default=True)
|
||||
start_time = TimeStringField("Start At", default="00:00", render_kw={"placeholder": "HH:MM"}, validators=[validators.Optional()])
|
||||
start_time = TimeStringField("Start At", default="00:00", validators=[validators.Optional()])
|
||||
duration = FormField(TimeDurationForm, label="Run duration")
|
||||
|
||||
class ScheduleLimitForm(Form):
|
||||
@@ -308,8 +308,12 @@ class ValidateAppRiseServers(object):
|
||||
# so that the custom endpoints are registered
|
||||
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
|
||||
for server_url in field.data:
|
||||
if not apobj.add(server_url):
|
||||
message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url))
|
||||
url = server_url.strip()
|
||||
if url.startswith("#"):
|
||||
continue
|
||||
|
||||
if not apobj.add(url):
|
||||
message = field.gettext('\'%s\' is not a valid AppRise URL.' % (url))
|
||||
raise ValidationError(message)
|
||||
|
||||
class ValidateJinja2Template(object):
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from typing import List
|
||||
from loguru import logger
|
||||
from lxml import etree
|
||||
from typing import List
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -298,8 +299,10 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
||||
try:
|
||||
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
|
||||
except json.JSONDecodeError:
|
||||
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
|
||||
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(str(e))
|
||||
|
||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||
# As a last resort, try to parse the whole <body>
|
||||
|
||||
@@ -69,7 +69,7 @@ def parse_headers_from_text_file(filepath):
|
||||
for l in f.readlines():
|
||||
l = l.strip()
|
||||
if not l.startswith('#') and ':' in l:
|
||||
(k, v) = l.split(':')
|
||||
(k, v) = l.split(':', 1) # Split only on the first colon
|
||||
headers[k.strip()] = v.strip()
|
||||
|
||||
return headers
|
||||
@@ -247,37 +247,32 @@ class model(watch_base):
|
||||
bump = self.history
|
||||
return self.__newest_history_key
|
||||
|
||||
# Given an arbitrary timestamp, find the closest next key
|
||||
# For example, last_viewed = 1000 so it should return the next 1001 timestamp
|
||||
#
|
||||
# used for the [diff] button so it can preset a smarter from_version
|
||||
# Given an arbitrary timestamp, find the best history key for the [diff] button so it can preset a smarter from_version
|
||||
@property
|
||||
def get_next_snapshot_key_to_last_viewed(self):
|
||||
def get_from_version_based_on_last_viewed(self):
|
||||
|
||||
"""Unfortunately for now timestamp is stored as string key"""
|
||||
keys = list(self.history.keys())
|
||||
if not keys:
|
||||
return None
|
||||
if len(keys) == 1:
|
||||
return keys[0]
|
||||
|
||||
last_viewed = int(self.get('last_viewed'))
|
||||
prev_k = keys[0]
|
||||
sorted_keys = sorted(keys, key=lambda x: int(x))
|
||||
sorted_keys.reverse()
|
||||
|
||||
# When the 'last viewed' timestamp is greater than the newest snapshot, return second last
|
||||
if last_viewed > int(sorted_keys[0]):
|
||||
# When the 'last viewed' timestamp is greater than or equal the newest snapshot, return second newest
|
||||
if last_viewed >= int(sorted_keys[0]):
|
||||
return sorted_keys[1]
|
||||
|
||||
# When the 'last viewed' timestamp is between snapshots, return the older snapshot
|
||||
for newer, older in list(zip(sorted_keys[0:], sorted_keys[1:])):
|
||||
if last_viewed < int(newer) and last_viewed >= int(older):
|
||||
return older
|
||||
|
||||
for k in sorted_keys:
|
||||
if int(k) < last_viewed:
|
||||
if prev_k == sorted_keys[0]:
|
||||
# Return the second last one so we dont recommend the same version compares itself
|
||||
return sorted_keys[1]
|
||||
|
||||
return prev_k
|
||||
prev_k = k
|
||||
|
||||
return keys[0]
|
||||
# When the 'last viewed' timestamp is less than the oldest snapshot, return oldest
|
||||
return sorted_keys[-1]
|
||||
|
||||
def get_history_snapshot(self, timestamp):
|
||||
import brotli
|
||||
@@ -304,34 +299,17 @@ class model(watch_base):
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
def save_history_text(self, contents, timestamp, snapshot_id):
|
||||
import brotli
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.storage.filesystem_storage import FileSystemStorage
|
||||
|
||||
logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
|
||||
else:
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(contents.encode('utf-8'))
|
||||
|
||||
# Append to index
|
||||
# @todo check last char was \n
|
||||
index_fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
with open(index_fname, 'a') as f:
|
||||
f.write("{},{}\n".format(timestamp, snapshot_fname))
|
||||
f.close()
|
||||
# Get storage from singleton store or create a filesystem storage as default
|
||||
store = ChangeDetectionStore.instance if hasattr(ChangeDetectionStore, 'instance') else None
|
||||
storage = store.storage if store and hasattr(store, 'storage') else FileSystemStorage(self.__datastore_path)
|
||||
|
||||
# Use the storage backend to save the history text
|
||||
snapshot_fname = storage.save_history_text(self.get('uuid'), contents, timestamp, snapshot_id)
|
||||
|
||||
self.__newest_history_key = timestamp
|
||||
self.__history_n += 1
|
||||
@@ -357,7 +335,7 @@ class model(watch_base):
|
||||
# Iterate over all history texts and see if something new exists
|
||||
# Always applying .strip() to start/end but optionally replace any other whitespace
|
||||
def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False):
|
||||
local_lines = []
|
||||
local_lines = set([])
|
||||
if lines:
|
||||
if ignore_whitespace:
|
||||
if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
|
||||
@@ -532,7 +510,7 @@ class model(watch_base):
|
||||
def save_error_text(self, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
with open(target_path, 'w') as f:
|
||||
with open(target_path, 'w', encoding='utf-8') as f:
|
||||
f.write(contents)
|
||||
|
||||
def save_xpath_data(self, data, as_error=False):
|
||||
|
||||
@@ -23,7 +23,7 @@ valid_tokens = {
|
||||
}
|
||||
|
||||
default_notification_format_for_watch = 'System default'
|
||||
default_notification_format = 'Text'
|
||||
default_notification_format = 'HTML Color'
|
||||
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
|
||||
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
|
||||
|
||||
@@ -31,6 +31,7 @@ valid_notification_formats = {
|
||||
'Text': NotifyFormat.TEXT,
|
||||
'Markdown': NotifyFormat.MARKDOWN,
|
||||
'HTML': NotifyFormat.HTML,
|
||||
'HTML Color': 'htmlcolor',
|
||||
# Used only for editing a watch (not for global)
|
||||
default_notification_format_for_watch: default_notification_format_for_watch
|
||||
}
|
||||
@@ -66,6 +67,10 @@ def process_notification(n_object, datastore):
|
||||
|
||||
sent_objs = []
|
||||
from .apprise_asset import asset
|
||||
|
||||
if 'as_async' in n_object:
|
||||
asset.async_mode = n_object.get('as_async')
|
||||
|
||||
apobj = apprise.Apprise(debug=True, asset=asset)
|
||||
|
||||
if not n_object.get('notification_urls'):
|
||||
@@ -76,9 +81,16 @@ def process_notification(n_object, datastore):
|
||||
|
||||
# Get the notification body from datastore
|
||||
n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters)
|
||||
if n_object.get('notification_format', '').startswith('HTML'):
|
||||
n_body = n_body.replace("\n", '<br>')
|
||||
|
||||
n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters)
|
||||
|
||||
url = url.strip()
|
||||
if url.startswith('#'):
|
||||
logger.trace(f"Skipping commented out notification URL - {url}")
|
||||
continue
|
||||
|
||||
if not url:
|
||||
logger.warning(f"Process Notification: skipping empty notification URL.")
|
||||
continue
|
||||
@@ -149,8 +161,6 @@ def process_notification(n_object, datastore):
|
||||
attach=n_object.get('screenshot', None)
|
||||
)
|
||||
|
||||
# Give apprise time to register an error
|
||||
time.sleep(3)
|
||||
|
||||
# Returns empty string if nothing found, multi-line string otherwise
|
||||
log_value = logs.getvalue()
|
||||
|
||||
@@ -33,8 +33,8 @@ class difference_detection_processor():
|
||||
|
||||
url = self.watch.link
|
||||
|
||||
# Protect against file://, file:/ access, check the real "link" without any meta "source:" etc prepended.
|
||||
if re.search(r'^file:/', url.strip(), re.IGNORECASE):
|
||||
# Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended.
|
||||
if re.search(r'^file:', url.strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg
|
||||
version="1.1"
|
||||
id="Layer_1"
|
||||
id="copy"
|
||||
x="0px"
|
||||
y="0px"
|
||||
viewBox="0 0 115.77 122.88"
|
||||
|
||||
|
Before Width: | Height: | Size: 2.5 KiB After Width: | Height: | Size: 2.5 KiB |
@@ -6,7 +6,7 @@
|
||||
height="7.5005589"
|
||||
width="11.248507"
|
||||
version="1.1"
|
||||
id="Layer_1"
|
||||
id="email"
|
||||
viewBox="0 0 7.1975545 4.7993639"
|
||||
xml:space="preserve"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
|
||||
|
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 1.9 KiB |
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg
|
||||
version="1.1"
|
||||
id="Layer_1"
|
||||
id="schedule"
|
||||
x="0px"
|
||||
y="0px"
|
||||
viewBox="0 0 661.20001 665.40002"
|
||||
|
||||
|
Before Width: | Height: | Size: 5.9 KiB After Width: | Height: | Size: 5.9 KiB |
@@ -221,7 +221,7 @@ $(document).ready(function () {
|
||||
// If you switch to "Click X,y" after an element here is setup, it will give the last co-ords anyway
|
||||
//if (x['isClickable'] || x['tagName'].startsWith('h') || x['tagName'] === 'a' || x['tagName'] === 'button' || x['tagtype'] === 'submit' || x['tagtype'] === 'checkbox' || x['tagtype'] === 'radio' || x['tagtype'] === 'li') {
|
||||
$('select', first_available).val('Click element').change();
|
||||
$('input[type=text]', first_available).first().val(x['xpath']);
|
||||
$('input[type=text]', first_available).first().val(x['xpath']).focus();
|
||||
found_something = true;
|
||||
//}
|
||||
}
|
||||
@@ -305,7 +305,7 @@ $(document).ready(function () {
|
||||
|
||||
if ($(this).val() === 'Click X,Y' && last_click_xy['x'] > 0 && $(elem_value).val().length === 0) {
|
||||
// @todo handle scale
|
||||
$(elem_value).val(last_click_xy['x'] + ',' + last_click_xy['y']);
|
||||
$(elem_value).val(last_click_xy['x'] + ',' + last_click_xy['y']).focus();
|
||||
}
|
||||
}).change();
|
||||
|
||||
|
||||
@@ -1,42 +1,52 @@
|
||||
$(document).ready(function() {
|
||||
$(document).ready(function () {
|
||||
|
||||
$('#add-email-helper').click(function (e) {
|
||||
e.preventDefault();
|
||||
email = prompt("Destination email");
|
||||
if(email) {
|
||||
var n = $(".notification-urls");
|
||||
var p=email_notification_prefix;
|
||||
$(n).val( $.trim( $(n).val() )+"\n"+email_notification_prefix+email );
|
||||
}
|
||||
});
|
||||
|
||||
$('#send-test-notification').click(function (e) {
|
||||
e.preventDefault();
|
||||
|
||||
data = {
|
||||
notification_body: $('#notification_body').val(),
|
||||
notification_format: $('#notification_format').val(),
|
||||
notification_title: $('#notification_title').val(),
|
||||
notification_urls: $('.notification-urls').val(),
|
||||
tags: $('#tags').val(),
|
||||
window_url: window.location.href,
|
||||
}
|
||||
|
||||
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: notification_base_url,
|
||||
data : data,
|
||||
statusCode: {
|
||||
400: function(data) {
|
||||
// More than likely the CSRF token was lost when the server restarted
|
||||
alert(data.responseText);
|
||||
$('#add-email-helper').click(function (e) {
|
||||
e.preventDefault();
|
||||
email = prompt("Destination email");
|
||||
if (email) {
|
||||
var n = $(".notification-urls");
|
||||
var p = email_notification_prefix;
|
||||
$(n).val($.trim($(n).val()) + "\n" + email_notification_prefix + email);
|
||||
}
|
||||
}
|
||||
}).done(function(data){
|
||||
console.log(data);
|
||||
alert(data);
|
||||
})
|
||||
});
|
||||
});
|
||||
|
||||
$('#send-test-notification').click(function (e) {
|
||||
e.preventDefault();
|
||||
|
||||
data = {
|
||||
notification_body: $('#notification_body').val(),
|
||||
notification_format: $('#notification_format').val(),
|
||||
notification_title: $('#notification_title').val(),
|
||||
notification_urls: $('.notification-urls').val(),
|
||||
tags: $('#tags').val(),
|
||||
window_url: window.location.href,
|
||||
}
|
||||
|
||||
$('.notifications-wrapper .spinner').fadeIn();
|
||||
$('#notification-test-log').show();
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: notification_base_url,
|
||||
data: data,
|
||||
statusCode: {
|
||||
400: function (data) {
|
||||
$("#notification-test-log>span").text(data.responseText);
|
||||
},
|
||||
}
|
||||
}).done(function (data) {
|
||||
$("#notification-test-log>span").text(data);
|
||||
}).fail(function (jqXHR, textStatus, errorThrown) {
|
||||
// Handle connection refused or other errors
|
||||
if (textStatus === "error" && errorThrown === "") {
|
||||
console.error("Connection refused or server unreachable");
|
||||
$("#notification-test-log>span").text("Error: Connection refused or server is unreachable.");
|
||||
} else {
|
||||
console.error("Error:", textStatus, errorThrown);
|
||||
$("#notification-test-log>span").text("An error occurred: " + textStatus);
|
||||
}
|
||||
}).always(function () {
|
||||
$('.notifications-wrapper .spinner').hide();
|
||||
})
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -40,19 +40,22 @@
|
||||
}
|
||||
}
|
||||
|
||||
@media only screen and (min-width: 760px) {
|
||||
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
font-size: 80%;
|
||||
#browser-steps-ui {
|
||||
flex-grow: 1; /* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1; /* Allow it to shrink if needed */
|
||||
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px;
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
font-size: 80%;
|
||||
|
||||
#browser-steps-ui {
|
||||
flex-grow: 1; /* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1; /* Allow it to shrink if needed */
|
||||
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#browser-steps-fieldlist {
|
||||
@@ -63,15 +66,21 @@
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll;
|
||||
}
|
||||
|
||||
/* this is duplicate :( */
|
||||
#browsersteps-selector-wrapper {
|
||||
height: 100% !important;
|
||||
}
|
||||
}
|
||||
|
||||
/* this is duplicate :( */
|
||||
#browsersteps-selector-wrapper {
|
||||
height: 100%;
|
||||
|
||||
width: 100%;
|
||||
overflow-y: scroll;
|
||||
position: relative;
|
||||
//width: 100%;
|
||||
height: 80vh;
|
||||
|
||||
> img {
|
||||
position: absolute;
|
||||
max-width: 100%;
|
||||
@@ -91,7 +100,6 @@
|
||||
left: 50%;
|
||||
top: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
margin-left: -40px;
|
||||
z-index: 100;
|
||||
max-width: 350px;
|
||||
text-align: center;
|
||||
|
||||
@@ -380,7 +380,15 @@ a.pure-button-selected {
|
||||
}
|
||||
|
||||
.notifications-wrapper {
|
||||
padding: 0.5rem 0 1rem 0;
|
||||
padding-top: 0.5rem;
|
||||
#notification-test-log {
|
||||
padding-top: 1rem;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
overflow-wrap: break-word;
|
||||
max-width: 100%;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
}
|
||||
|
||||
label {
|
||||
|
||||
@@ -46,21 +46,22 @@
|
||||
#browser_steps li > label {
|
||||
display: none; }
|
||||
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
font-size: 80%; }
|
||||
#browser-steps .flex-wrapper #browser-steps-ui {
|
||||
flex-grow: 1;
|
||||
/* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1;
|
||||
/* Allow it to shrink if needed */
|
||||
flex-basis: 0;
|
||||
/* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px; }
|
||||
#browser-steps .flex-wrapper #browser-steps-fieldlist {
|
||||
@media only screen and (min-width: 760px) {
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
font-size: 80%; }
|
||||
#browser-steps .flex-wrapper #browser-steps-ui {
|
||||
flex-grow: 1;
|
||||
/* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1;
|
||||
/* Allow it to shrink if needed */
|
||||
flex-basis: 0;
|
||||
/* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px; }
|
||||
#browser-steps-fieldlist {
|
||||
flex-grow: 0;
|
||||
/* Don't allow it to grow */
|
||||
flex-shrink: 0;
|
||||
@@ -71,13 +72,16 @@
|
||||
/* Set a max width to prevent overflow */
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll; }
|
||||
/* this is duplicate :( */
|
||||
#browsersteps-selector-wrapper {
|
||||
height: 100% !important; } }
|
||||
|
||||
/* this is duplicate :( */
|
||||
#browsersteps-selector-wrapper {
|
||||
height: 100%;
|
||||
width: 100%;
|
||||
overflow-y: scroll;
|
||||
position: relative;
|
||||
height: 80vh;
|
||||
/* nice tall skinny one */ }
|
||||
#browsersteps-selector-wrapper > img {
|
||||
position: absolute;
|
||||
@@ -92,7 +96,6 @@
|
||||
left: 50%;
|
||||
top: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
margin-left: -40px;
|
||||
z-index: 100;
|
||||
max-width: 350px;
|
||||
text-align: center; }
|
||||
@@ -780,7 +783,14 @@ a.pure-button-selected {
|
||||
cursor: pointer; }
|
||||
|
||||
.notifications-wrapper {
|
||||
padding: 0.5rem 0 1rem 0; }
|
||||
padding-top: 0.5rem; }
|
||||
.notifications-wrapper #notification-test-log {
|
||||
padding-top: 1rem;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
overflow-wrap: break-word;
|
||||
max-width: 100%;
|
||||
box-sizing: border-box; }
|
||||
|
||||
label:hover {
|
||||
cursor: pointer; }
|
||||
|
||||
61
changedetectionio/storage/README.md
Normal file
@@ -0,0 +1,61 @@
|
||||
# Storage Backends for changedetection.io
|
||||
|
||||
This module provides different storage backends for changedetection.io, allowing you to store data in various systems:
|
||||
|
||||
- **FileSystemStorage**: The default storage backend that stores data on the local filesystem.
|
||||
- **MongoDBStorage**: Stores data in a MongoDB database.
|
||||
- **S3Storage**: Stores data in an Amazon S3 bucket.
|
||||
|
||||
## Usage
|
||||
|
||||
The storage backend is automatically selected based on the datastore path provided when initializing the application:
|
||||
|
||||
- For filesystem storage (default): `/datastore`
|
||||
- For MongoDB storage: `mongodb://username:password@host:port/database`
|
||||
- For S3 storage: `s3://bucket-name/optional-prefix`
|
||||
|
||||
## Configuration
|
||||
|
||||
### Filesystem Storage
|
||||
|
||||
The default storage backend. Simply specify a directory path:
|
||||
|
||||
```
|
||||
changedetection.io -d /path/to/datastore
|
||||
```
|
||||
|
||||
### MongoDB Storage
|
||||
|
||||
To use MongoDB storage, specify a MongoDB connection URI:
|
||||
|
||||
```
|
||||
changedetection.io -d mongodb://username:password@host:port/database
|
||||
```
|
||||
|
||||
Make sure to install the required dependencies:
|
||||
|
||||
```
|
||||
pip install -r requirements-storage.txt
|
||||
```
|
||||
|
||||
### Amazon S3 Storage
|
||||
|
||||
To use S3 storage, specify an S3 URI:
|
||||
|
||||
```
|
||||
changedetection.io -d s3://bucket-name/optional-prefix
|
||||
```
|
||||
|
||||
Make sure to:
|
||||
1. Install the required dependencies: `pip install -r requirements-storage.txt`
|
||||
2. Configure AWS credentials using environment variables or IAM roles:
|
||||
- Set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables
|
||||
- Or use an IAM role when running on AWS EC2/ECS/EKS
|
||||
|
||||
## Custom Storage Backends
|
||||
|
||||
You can create custom storage backends by:
|
||||
|
||||
1. Subclassing the `StorageBase` abstract class in `storage_base.py`
|
||||
2. Implementing all required methods
|
||||
3. Adding your backend to the `storage_factory.py` file
|
||||
1
changedetectionio/storage/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# This module contains storage backend implementations
|
||||
449
changedetectionio/storage/filesystem_storage.py
Normal file
@@ -0,0 +1,449 @@
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
import brotli
|
||||
import zlib
|
||||
import pathlib
|
||||
from loguru import logger
|
||||
from os import path
|
||||
|
||||
from .storage_base import StorageBase
|
||||
|
||||
class FileSystemStorage(StorageBase):
|
||||
"""File system storage backend"""
|
||||
|
||||
def __init__(self, datastore_path, include_default_watches=True, version_tag="0.0.0"):
|
||||
"""Initialize the file system storage backend
|
||||
|
||||
Args:
|
||||
datastore_path (str): Path to the datastore
|
||||
include_default_watches (bool): Whether to include default watches
|
||||
version_tag (str): Version tag
|
||||
"""
|
||||
self.datastore_path = datastore_path
|
||||
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
|
||||
logger.info(f"Datastore path is '{self.json_store_path}'")
|
||||
|
||||
def load_data(self):
|
||||
"""Load data from the file system
|
||||
|
||||
Returns:
|
||||
dict: The loaded data
|
||||
"""
|
||||
if not path.isfile(self.json_store_path):
|
||||
return None
|
||||
|
||||
with open(self.json_store_path) as json_file:
|
||||
return json.load(json_file)
|
||||
|
||||
def save_data(self, data):
|
||||
"""Save data to the file system
|
||||
|
||||
Args:
|
||||
data (dict): The data to save
|
||||
"""
|
||||
try:
|
||||
# Re #286 - First write to a temp file, then confirm it looks OK and rename it
|
||||
# This is a fairly basic strategy to deal with the case that the file is corrupted,
|
||||
# system was out of memory, out of RAM etc
|
||||
with open(self.json_store_path+".tmp", 'w') as json_file:
|
||||
json.dump(data, json_file, indent=4)
|
||||
os.replace(self.json_store_path+".tmp", self.json_store_path)
|
||||
except Exception as e:
|
||||
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
|
||||
raise e
|
||||
|
||||
def get_watch_dir(self, watch_uuid):
|
||||
"""Get the directory for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The watch directory
|
||||
"""
|
||||
return os.path.join(self.datastore_path, watch_uuid)
|
||||
|
||||
def ensure_data_dir_exists(self, watch_uuid):
|
||||
"""Ensure the data directory exists for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
if not os.path.isdir(watch_dir):
|
||||
logger.debug(f"> Creating data dir {watch_dir}")
|
||||
os.makedirs(watch_dir, exist_ok=True)
|
||||
|
||||
def save_history_text(self, watch_uuid, contents, timestamp, snapshot_id):
|
||||
"""Save history text to the file system
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Contents to save
|
||||
timestamp (int): Timestamp
|
||||
snapshot_id (str): Snapshot ID
|
||||
|
||||
Returns:
|
||||
str: Snapshot filename
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
|
||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
skip_brotli = os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False').lower() in ('true', '1', 't')
|
||||
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
dest = os.path.join(watch_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
|
||||
else:
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(watch_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(contents.encode('utf-8'))
|
||||
|
||||
# Append to index
|
||||
index_fname = os.path.join(watch_dir, "history.txt")
|
||||
with open(index_fname, 'a') as f:
|
||||
f.write("{},{}\n".format(timestamp, snapshot_fname))
|
||||
|
||||
return snapshot_fname
|
||||
|
||||
def get_history(self, watch_uuid):
|
||||
"""Get history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
dict: The history with timestamp keys and snapshot IDs as values
|
||||
"""
|
||||
tmp_history = {}
|
||||
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
if not os.path.isdir(watch_dir):
|
||||
return tmp_history
|
||||
|
||||
# Read the history file as a dict
|
||||
fname = os.path.join(watch_dir, "history.txt")
|
||||
if os.path.isfile(fname):
|
||||
logger.debug(f"Reading watch history index for {watch_uuid}")
|
||||
with open(fname, "r") as f:
|
||||
for i in f.readlines():
|
||||
if ',' in i:
|
||||
k, v = i.strip().split(',', 2)
|
||||
|
||||
# The index history could contain a relative path, so we need to make the fullpath
|
||||
# so that python can read it
|
||||
if not '/' in v and not '\'' in v:
|
||||
v = os.path.join(watch_dir, v)
|
||||
else:
|
||||
# It's possible that they moved the datadir on older versions
|
||||
# So the snapshot exists but is in a different path
|
||||
snapshot_fname = v.split('/')[-1]
|
||||
proposed_new_path = os.path.join(watch_dir, snapshot_fname)
|
||||
if not os.path.exists(v) and os.path.exists(proposed_new_path):
|
||||
v = proposed_new_path
|
||||
|
||||
tmp_history[k] = v
|
||||
|
||||
return tmp_history
|
||||
|
||||
def get_history_snapshot(self, watch_uuid, timestamp):
|
||||
"""Get a history snapshot from the file system
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str: The snapshot content
|
||||
"""
|
||||
history = self.get_history(watch_uuid)
|
||||
if not timestamp in history:
|
||||
return None
|
||||
|
||||
filepath = history[timestamp]
|
||||
|
||||
# See if a brotli versions exists and switch to that
|
||||
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
|
||||
filepath = f"{filepath}.br"
|
||||
|
||||
# OR in the backup case that the .br does not exist, but the plain one does
|
||||
if filepath.endswith('.br') and not os.path.isfile(filepath):
|
||||
if os.path.isfile(filepath.replace('.br', '')):
|
||||
filepath = filepath.replace('.br', '')
|
||||
|
||||
if filepath.endswith('.br'):
|
||||
# Brotli doesnt have a fileheader to detect it, so we rely on filename
|
||||
# https://www.rfc-editor.org/rfc/rfc7932
|
||||
with open(filepath, 'rb') as f:
|
||||
return(brotli.decompress(f.read()).decode('utf-8'))
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
return f.read()
|
||||
|
||||
def save_screenshot(self, watch_uuid, screenshot, as_error=False):
|
||||
"""Save a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
screenshot (bytes): Screenshot data
|
||||
as_error (bool): Whether this is an error screenshot
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(watch_dir, "last-error-screenshot.png")
|
||||
else:
|
||||
target_path = os.path.join(watch_dir, "last-screenshot.png")
|
||||
|
||||
with open(target_path, 'wb') as f:
|
||||
f.write(screenshot)
|
||||
|
||||
def get_screenshot(self, watch_uuid, is_error=False):
|
||||
"""Get a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get the error screenshot
|
||||
|
||||
Returns:
|
||||
str or None: The screenshot path or None if not available
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
if is_error:
|
||||
fname = os.path.join(watch_dir, "last-error-screenshot.png")
|
||||
else:
|
||||
fname = os.path.join(watch_dir, "last-screenshot.png")
|
||||
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
|
||||
return None
|
||||
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
"""Save error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Error contents
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
target_path = os.path.join(watch_dir, "last-error.txt")
|
||||
with open(target_path, 'w', encoding='utf-8') as f:
|
||||
f.write(contents)
|
||||
|
||||
def get_error_text(self, watch_uuid):
|
||||
"""Get error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str or False: The error text or False if not available
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
fname = os.path.join(watch_dir, "last-error.txt")
|
||||
|
||||
if os.path.isfile(fname):
|
||||
with open(fname, 'r') as f:
|
||||
return f.read()
|
||||
|
||||
return False
|
||||
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
"""Save XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
data (dict): XPath data
|
||||
as_error (bool): Whether this is error data
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(watch_dir, "elements-error.deflate")
|
||||
else:
|
||||
target_path = os.path.join(watch_dir, "elements.deflate")
|
||||
|
||||
with open(target_path, 'wb') as f:
|
||||
f.write(zlib.compress(json.dumps(data).encode()))
|
||||
|
||||
def get_xpath_data(self, watch_uuid, is_error=False):
|
||||
"""Get XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get error data
|
||||
|
||||
Returns:
|
||||
dict or None: The XPath data or None if not available
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
if is_error:
|
||||
path = os.path.join(watch_dir, "elements-error.deflate")
|
||||
else:
|
||||
path = os.path.join(watch_dir, "elements.deflate")
|
||||
|
||||
if not os.path.isfile(path):
|
||||
return None
|
||||
|
||||
with open(path, 'rb') as f:
|
||||
return json.loads(zlib.decompress(f.read()).decode('utf-8'))
|
||||
|
||||
def save_last_fetched_html(self, watch_uuid, timestamp, contents):
|
||||
"""Save last fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
contents (str): HTML contents
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(watch_dir, snapshot_fname)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
contents = contents.encode('utf-8') if isinstance(contents, str) else contents
|
||||
try:
|
||||
f.write(brotli.compress(contents))
|
||||
except Exception as e:
|
||||
logger.warning(f"{watch_uuid} - Unable to compress snapshot, saving as raw data to {filepath}")
|
||||
logger.warning(e)
|
||||
f.write(contents)
|
||||
|
||||
# Prune old snapshots - keep only the newest 2
|
||||
self._prune_last_fetched_html_snapshots(watch_uuid)
|
||||
|
||||
def _prune_last_fetched_html_snapshots(self, watch_uuid):
|
||||
"""Prune old HTML snapshots
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
history = self.get_history(watch_uuid)
|
||||
|
||||
dates = list(history.keys())
|
||||
dates.reverse()
|
||||
|
||||
for index, timestamp in enumerate(dates):
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(watch_dir, snapshot_fname)
|
||||
|
||||
# Keep only the first 2
|
||||
if index > 1 and os.path.isfile(filepath):
|
||||
os.remove(filepath)
|
||||
|
||||
def get_fetched_html(self, watch_uuid, timestamp):
|
||||
"""Get fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str or False: The HTML or False if not available
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(watch_dir, snapshot_fname)
|
||||
|
||||
if os.path.isfile(filepath):
|
||||
with open(filepath, 'rb') as f:
|
||||
return brotli.decompress(f.read()).decode('utf-8')
|
||||
|
||||
return False
|
||||
|
||||
def save_last_text_fetched_before_filters(self, watch_uuid, contents):
|
||||
"""Save the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Text contents
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
filepath = os.path.join(watch_dir, 'last-fetched.br')
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
|
||||
|
||||
def get_last_fetched_text_before_filters(self, watch_uuid):
|
||||
"""Get the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The text
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
filepath = os.path.join(watch_dir, 'last-fetched.br')
|
||||
|
||||
if not os.path.isfile(filepath):
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
history = self.get_history(watch_uuid)
|
||||
dates = list(history.keys())
|
||||
|
||||
if len(dates):
|
||||
return self.get_history_snapshot(watch_uuid, dates[-1])
|
||||
else:
|
||||
return ''
|
||||
|
||||
with open(filepath, 'rb') as f:
|
||||
return brotli.decompress(f.read()).decode('utf-8')
|
||||
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
"""Check if visual selector data is ready
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
bool: Whether visual selector data is ready
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
screenshot_filename = os.path.join(watch_dir, "last-screenshot.png")
|
||||
elements_index_filename = os.path.join(watch_dir, "elements.deflate")
|
||||
|
||||
return path.isfile(screenshot_filename) and path.isfile(elements_index_filename)
|
||||
|
||||
def clear_watch_history(self, watch_uuid):
|
||||
"""Clear history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
if not os.path.exists(watch_dir):
|
||||
return
|
||||
|
||||
# Delete all files but keep the directory
|
||||
for item in pathlib.Path(watch_dir).glob("*.*"):
|
||||
os.unlink(item)
|
||||
|
||||
def delete_watch(self, watch_uuid):
|
||||
"""Delete a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
if os.path.exists(watch_dir):
|
||||
shutil.rmtree(watch_dir)
|
||||
466
changedetectionio/storage/mongodb_storage.py
Normal file
@@ -0,0 +1,466 @@
|
||||
import os
|
||||
from copy import deepcopy
|
||||
|
||||
import brotli
|
||||
import zlib
|
||||
import json
|
||||
import time
|
||||
from loguru import logger
|
||||
from pymongo import MongoClient
|
||||
from urllib.parse import urlparse
|
||||
import base64
|
||||
|
||||
from .storage_base import StorageBase
|
||||
|
||||
class MongoDBStorage(StorageBase):
|
||||
"""MongoDB storage backend"""
|
||||
|
||||
def __init__(self, datastore_path, include_default_watches=True, version_tag="0.0.0"):
|
||||
"""Initialize the MongoDB storage backend
|
||||
|
||||
Args:
|
||||
datastore_path (str): MongoDB connection URI
|
||||
include_default_watches (bool): Whether to include default watches
|
||||
version_tag (str): Version tag
|
||||
"""
|
||||
# Parse MongoDB URI from datastore_path
|
||||
parsed_uri = urlparse(datastore_path)
|
||||
self.db_name = parsed_uri.path.lstrip('/')
|
||||
if not self.db_name:
|
||||
self.db_name = 'changedetection'
|
||||
|
||||
# Connect to MongoDB
|
||||
self.client = MongoClient(datastore_path)
|
||||
self.db = self.client[self.db_name]
|
||||
|
||||
# Collections
|
||||
self.app_collection = self.db['app']
|
||||
self.watches_collection = self.db['watches']
|
||||
self.snapshots_collection = self.db['snapshots']
|
||||
self.history_collection = self.db['history']
|
||||
self.error_collection = self.db['errors']
|
||||
self.xpath_collection = self.db['xpath']
|
||||
self.html_collection = self.db['html']
|
||||
|
||||
logger.info(f"MongoDB storage initialized, connected to {datastore_path}")
|
||||
|
||||
def load_data(self):
|
||||
"""Load data from MongoDB
|
||||
|
||||
Returns:
|
||||
dict: The loaded data
|
||||
"""
|
||||
app_data = self.app_collection.find_one({'_id': 'app_data'})
|
||||
if not app_data:
|
||||
return None
|
||||
|
||||
# Remove MongoDB _id field
|
||||
if '_id' in app_data:
|
||||
del app_data['_id']
|
||||
|
||||
return app_data
|
||||
|
||||
def save_data(self, data):
|
||||
"""Save data to MongoDB
|
||||
|
||||
Args:
|
||||
data (dict): The data to save
|
||||
"""
|
||||
try:
|
||||
# Create a copy to modify
|
||||
data_copy = deepcopy(data)
|
||||
|
||||
# Set _id for app data
|
||||
data_copy['_id'] = 'app_data'
|
||||
|
||||
# Insert or update app data
|
||||
self.app_collection.replace_one({'_id': 'app_data'}, data_copy, upsert=True)
|
||||
|
||||
# Also store watches separately for more granular access
|
||||
# This provides a safety net in case of corrupted app_data
|
||||
watches = data.get('watching', {})
|
||||
for uuid, watch in watches.items():
|
||||
if isinstance(watch, dict): # Handle case where watch is a Watch object
|
||||
watch_copy = deepcopy(dict(watch))
|
||||
else:
|
||||
watch_copy = deepcopy(watch)
|
||||
watch_copy['_id'] = uuid
|
||||
self.watches_collection.replace_one({'_id': uuid}, watch_copy, upsert=True)
|
||||
|
||||
# Also store tags separately
|
||||
if 'settings' in data and 'application' in data['settings'] and 'tags' in data['settings']['application']:
|
||||
tags = data['settings']['application']['tags']
|
||||
for uuid, tag in tags.items():
|
||||
if isinstance(tag, dict): # Handle case where tag is a Tag object
|
||||
tag_copy = deepcopy(dict(tag))
|
||||
else:
|
||||
tag_copy = deepcopy(tag)
|
||||
tag_copy['_id'] = uuid
|
||||
self.db['tags'].replace_one({'_id': uuid}, tag_copy, upsert=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error writing to MongoDB: {str(e)}")
|
||||
raise e
|
||||
|
||||
def ensure_data_dir_exists(self, watch_uuid):
|
||||
"""Ensure the data directory exists for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# MongoDB doesn't need directories, this is a no-op
|
||||
pass
|
||||
|
||||
def save_history_text(self, watch_uuid, contents, timestamp, snapshot_id):
|
||||
"""Save history text to MongoDB
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Contents to save
|
||||
timestamp (int): Timestamp
|
||||
snapshot_id (str): Snapshot ID
|
||||
|
||||
Returns:
|
||||
str: Snapshot ID
|
||||
"""
|
||||
# Compress the contents
|
||||
compressed_contents = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||
|
||||
# Store the snapshot
|
||||
snapshot_data = {
|
||||
'_id': f"{watch_uuid}:{timestamp}",
|
||||
'watch_uuid': watch_uuid,
|
||||
'timestamp': timestamp,
|
||||
'snapshot_id': snapshot_id,
|
||||
'contents': base64.b64encode(compressed_contents).decode('ascii'),
|
||||
'compressed': True
|
||||
}
|
||||
|
||||
self.snapshots_collection.replace_one({'_id': snapshot_data['_id']}, snapshot_data, upsert=True)
|
||||
|
||||
# Update history index
|
||||
history_entry = {
|
||||
'watch_uuid': watch_uuid,
|
||||
'timestamp': timestamp,
|
||||
'snapshot_id': snapshot_id
|
||||
}
|
||||
|
||||
self.history_collection.replace_one(
|
||||
{'watch_uuid': watch_uuid, 'timestamp': timestamp},
|
||||
history_entry,
|
||||
upsert=True
|
||||
)
|
||||
|
||||
return snapshot_id
|
||||
|
||||
def get_history(self, watch_uuid):
|
||||
"""Get history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
dict: The history with timestamp keys and snapshot IDs as values
|
||||
"""
|
||||
history = {}
|
||||
|
||||
# Query history entries for this watch
|
||||
entries = self.history_collection.find({'watch_uuid': watch_uuid}).sort('timestamp', 1)
|
||||
|
||||
for entry in entries:
|
||||
history[str(entry['timestamp'])] = entry['snapshot_id']
|
||||
|
||||
return history
|
||||
|
||||
def get_history_snapshot(self, watch_uuid, timestamp):
|
||||
"""Get a history snapshot from MongoDB
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str: The snapshot content
|
||||
"""
|
||||
# Query for the snapshot
|
||||
snapshot = self.snapshots_collection.find_one({'_id': f"{watch_uuid}:{timestamp}"})
|
||||
|
||||
if not snapshot:
|
||||
return None
|
||||
|
||||
if snapshot.get('compressed', False):
|
||||
# Decompress the contents
|
||||
compressed_data = base64.b64decode(snapshot['contents'])
|
||||
return brotli.decompress(compressed_data).decode('utf-8')
|
||||
else:
|
||||
return snapshot['contents']
|
||||
|
||||
def save_screenshot(self, watch_uuid, screenshot, as_error=False):
|
||||
"""Save a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
screenshot (bytes): Screenshot data
|
||||
as_error (bool): Whether this is an error screenshot
|
||||
"""
|
||||
collection_name = 'error_screenshots' if as_error else 'screenshots'
|
||||
collection = self.db[collection_name]
|
||||
|
||||
# Encode the screenshot as base64
|
||||
encoded_screenshot = base64.b64encode(screenshot).decode('ascii')
|
||||
|
||||
screenshot_data = {
|
||||
'_id': watch_uuid,
|
||||
'watch_uuid': watch_uuid,
|
||||
'screenshot': encoded_screenshot,
|
||||
'timestamp': int(time.time())
|
||||
}
|
||||
|
||||
collection.replace_one({'_id': watch_uuid}, screenshot_data, upsert=True)
|
||||
|
||||
def get_screenshot(self, watch_uuid, is_error=False):
|
||||
"""Get a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get the error screenshot
|
||||
|
||||
Returns:
|
||||
bytes or None: The screenshot data or None if not available
|
||||
"""
|
||||
collection_name = 'error_screenshots' if is_error else 'screenshots'
|
||||
collection = self.db[collection_name]
|
||||
|
||||
screenshot_data = collection.find_one({'_id': watch_uuid})
|
||||
if not screenshot_data:
|
||||
return None
|
||||
|
||||
# Decode the screenshot from base64
|
||||
return base64.b64decode(screenshot_data['screenshot'])
|
||||
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
"""Save error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Error contents
|
||||
"""
|
||||
error_data = {
|
||||
'_id': watch_uuid,
|
||||
'watch_uuid': watch_uuid,
|
||||
'error_text': contents,
|
||||
'timestamp': int(time.time())
|
||||
}
|
||||
|
||||
self.error_collection.replace_one({'_id': watch_uuid}, error_data, upsert=True)
|
||||
|
||||
def get_error_text(self, watch_uuid):
|
||||
"""Get error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str or False: The error text or False if not available
|
||||
"""
|
||||
error_data = self.error_collection.find_one({'_id': watch_uuid})
|
||||
if not error_data:
|
||||
return False
|
||||
|
||||
return error_data['error_text']
|
||||
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
"""Save XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
data (dict): XPath data
|
||||
as_error (bool): Whether this is error data
|
||||
"""
|
||||
# Compress the data
|
||||
compressed_data = zlib.compress(json.dumps(data).encode())
|
||||
|
||||
_id = f"{watch_uuid}:error" if as_error else watch_uuid
|
||||
|
||||
xpath_data = {
|
||||
'_id': _id,
|
||||
'watch_uuid': watch_uuid,
|
||||
'is_error': as_error,
|
||||
'data': base64.b64encode(compressed_data).decode('ascii'),
|
||||
'timestamp': int(time.time())
|
||||
}
|
||||
|
||||
self.xpath_collection.replace_one({'_id': _id}, xpath_data, upsert=True)
|
||||
|
||||
def get_xpath_data(self, watch_uuid, is_error=False):
|
||||
"""Get XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get error data
|
||||
|
||||
Returns:
|
||||
dict or None: The XPath data or None if not available
|
||||
"""
|
||||
_id = f"{watch_uuid}:error" if is_error else watch_uuid
|
||||
|
||||
xpath_data = self.xpath_collection.find_one({'_id': _id})
|
||||
if not xpath_data:
|
||||
return None
|
||||
|
||||
# Decompress the data
|
||||
compressed_data = base64.b64decode(xpath_data['data'])
|
||||
return json.loads(zlib.decompress(compressed_data).decode('utf-8'))
|
||||
|
||||
def save_last_fetched_html(self, watch_uuid, timestamp, contents):
|
||||
"""Save last fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
contents (str): HTML contents
|
||||
"""
|
||||
# Compress the contents
|
||||
contents_bytes = contents.encode('utf-8') if isinstance(contents, str) else contents
|
||||
try:
|
||||
compressed_contents = brotli.compress(contents_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"{watch_uuid} - Unable to compress HTML snapshot: {str(e)}")
|
||||
compressed_contents = contents_bytes
|
||||
|
||||
html_data = {
|
||||
'_id': f"{watch_uuid}:{timestamp}",
|
||||
'watch_uuid': watch_uuid,
|
||||
'timestamp': timestamp,
|
||||
'html': base64.b64encode(compressed_contents).decode('ascii'),
|
||||
'compressed': True
|
||||
}
|
||||
|
||||
self.html_collection.replace_one({'_id': html_data['_id']}, html_data, upsert=True)
|
||||
|
||||
# Prune old snapshots - keep only the newest 2
|
||||
self._prune_last_fetched_html_snapshots(watch_uuid)
|
||||
|
||||
def _prune_last_fetched_html_snapshots(self, watch_uuid):
|
||||
"""Prune old HTML snapshots
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# Get all HTML snapshots for this watch, sorted by timestamp descending
|
||||
html_snapshots = list(
|
||||
self.html_collection.find({'watch_uuid': watch_uuid}).sort('timestamp', -1)
|
||||
)
|
||||
|
||||
# Keep only the first 2
|
||||
if len(html_snapshots) > 2:
|
||||
for snapshot in html_snapshots[2:]:
|
||||
self.html_collection.delete_one({'_id': snapshot['_id']})
|
||||
|
||||
def get_fetched_html(self, watch_uuid, timestamp):
|
||||
"""Get fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str or False: The HTML or False if not available
|
||||
"""
|
||||
html_data = self.html_collection.find_one({'_id': f"{watch_uuid}:{timestamp}"})
|
||||
|
||||
if not html_data:
|
||||
return False
|
||||
|
||||
if html_data.get('compressed', False):
|
||||
# Decompress the contents
|
||||
compressed_data = base64.b64decode(html_data['html'])
|
||||
return brotli.decompress(compressed_data).decode('utf-8')
|
||||
else:
|
||||
return html_data['html']
|
||||
|
||||
def save_last_text_fetched_before_filters(self, watch_uuid, contents):
|
||||
"""Save the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Text contents
|
||||
"""
|
||||
# Compress the contents
|
||||
compressed_contents = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||
|
||||
last_fetched_data = {
|
||||
'_id': watch_uuid,
|
||||
'watch_uuid': watch_uuid,
|
||||
'contents': base64.b64encode(compressed_contents).decode('ascii'),
|
||||
'timestamp': int(time.time())
|
||||
}
|
||||
|
||||
self.db['last_fetched'].replace_one({'_id': watch_uuid}, last_fetched_data, upsert=True)
|
||||
|
||||
def get_last_fetched_text_before_filters(self, watch_uuid):
|
||||
"""Get the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The text
|
||||
"""
|
||||
last_fetched_data = self.db['last_fetched'].find_one({'_id': watch_uuid})
|
||||
|
||||
if not last_fetched_data:
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
history = self.get_history(watch_uuid)
|
||||
dates = list(history.keys())
|
||||
|
||||
if len(dates):
|
||||
return self.get_history_snapshot(watch_uuid, dates[-1])
|
||||
else:
|
||||
return ''
|
||||
|
||||
# Decompress the contents
|
||||
compressed_data = base64.b64decode(last_fetched_data['contents'])
|
||||
return brotli.decompress(compressed_data).decode('utf-8')
|
||||
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
"""Check if visual selector data is ready
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
bool: Whether visual selector data is ready
|
||||
"""
|
||||
# Check if screenshot and xpath data exist
|
||||
screenshot = self.db['screenshots'].find_one({'_id': watch_uuid})
|
||||
xpath_data = self.xpath_collection.find_one({'_id': watch_uuid})
|
||||
|
||||
return screenshot is not None and xpath_data is not None
|
||||
|
||||
def clear_watch_history(self, watch_uuid):
|
||||
"""Clear history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# Delete all snapshots and history for this watch
|
||||
self.snapshots_collection.delete_many({'watch_uuid': watch_uuid})
|
||||
self.history_collection.delete_many({'watch_uuid': watch_uuid})
|
||||
self.html_collection.delete_many({'watch_uuid': watch_uuid})
|
||||
self.db['last_fetched'].delete_many({'watch_uuid': watch_uuid})
|
||||
self.xpath_collection.delete_many({'watch_uuid': watch_uuid})
|
||||
self.db['screenshots'].delete_many({'watch_uuid': watch_uuid})
|
||||
self.error_collection.delete_many({'watch_uuid': watch_uuid})
|
||||
|
||||
def delete_watch(self, watch_uuid):
|
||||
"""Delete a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# Clear all history data
|
||||
self.clear_watch_history(watch_uuid)
|
||||
|
||||
# Also delete error screenshots
|
||||
self.db['error_screenshots'].delete_many({'watch_uuid': watch_uuid})
|
||||
525
changedetectionio/storage/s3_storage.py
Normal file
@@ -0,0 +1,525 @@
|
||||
import os
|
||||
import io
|
||||
import json
|
||||
import brotli
|
||||
import zlib
|
||||
import time
|
||||
from loguru import logger
|
||||
import boto3
|
||||
from urllib.parse import urlparse
|
||||
import base64
|
||||
|
||||
from .storage_base import StorageBase
|
||||
|
||||
class S3Storage(StorageBase):
|
||||
"""Amazon S3 storage backend"""
|
||||
|
||||
def __init__(self, datastore_path, include_default_watches=True, version_tag="0.0.0"):
|
||||
"""Initialize the S3 storage backend
|
||||
|
||||
Args:
|
||||
datastore_path (str): S3 URI (s3://bucket-name/optional-prefix)
|
||||
include_default_watches (bool): Whether to include default watches
|
||||
version_tag (str): Version tag
|
||||
"""
|
||||
# Parse S3 URI
|
||||
parsed_uri = urlparse(datastore_path)
|
||||
self.bucket_name = parsed_uri.netloc
|
||||
self.prefix = parsed_uri.path.lstrip('/')
|
||||
|
||||
if self.prefix and not self.prefix.endswith('/'):
|
||||
self.prefix += '/'
|
||||
|
||||
# Initialize S3 client
|
||||
# Uses AWS credentials from environment variables or IAM role
|
||||
self.s3 = boto3.client('s3')
|
||||
|
||||
logger.info(f"S3 storage initialized, using bucket '{self.bucket_name}' with prefix '{self.prefix}'")
|
||||
|
||||
def _get_key(self, path):
|
||||
"""Get the S3 key for a path
|
||||
|
||||
Args:
|
||||
path (str): Path relative to the prefix
|
||||
|
||||
Returns:
|
||||
str: The full S3 key
|
||||
"""
|
||||
return f"{self.prefix}{path}"
|
||||
|
||||
def load_data(self):
|
||||
"""Load data from S3
|
||||
|
||||
Returns:
|
||||
dict: The loaded data
|
||||
"""
|
||||
key = self._get_key("app-data.json")
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
return json.loads(response['Body'].read().decode('utf-8'))
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading data from S3: {str(e)}")
|
||||
raise e
|
||||
|
||||
def save_data(self, data):
|
||||
"""Save data to S3
|
||||
|
||||
Args:
|
||||
data (dict): The data to save
|
||||
"""
|
||||
try:
|
||||
key = self._get_key("app-data.json")
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=json.dumps(data, indent=4),
|
||||
ContentType='application/json'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving data to S3: {str(e)}")
|
||||
raise e
|
||||
|
||||
def ensure_data_dir_exists(self, watch_uuid):
|
||||
"""Ensure the data directory exists for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# S3 doesn't need directories, this is a no-op
|
||||
pass
|
||||
|
||||
def _get_watch_prefix(self, watch_uuid):
|
||||
"""Get the S3 prefix for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The watch prefix
|
||||
"""
|
||||
return self._get_key(f"watches/{watch_uuid}/")
|
||||
|
||||
def save_history_text(self, watch_uuid, contents, timestamp, snapshot_id):
|
||||
"""Save history text to S3
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Contents to save
|
||||
timestamp (int): Timestamp
|
||||
snapshot_id (str): Snapshot ID
|
||||
|
||||
Returns:
|
||||
str: Snapshot ID
|
||||
"""
|
||||
# Determine if we should compress
|
||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
skip_brotli = os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False').lower() in ('true', '1', 't')
|
||||
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
# Save the snapshot
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
snapshot_key = f"{watch_prefix}snapshots/{snapshot_id}.txt.br"
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
compressed_contents = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=snapshot_key,
|
||||
Body=compressed_contents
|
||||
)
|
||||
else:
|
||||
snapshot_key = f"{watch_prefix}snapshots/{snapshot_id}.txt"
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=snapshot_key,
|
||||
Body=contents.encode('utf-8')
|
||||
)
|
||||
|
||||
# Update history index
|
||||
history_key = f"{watch_prefix}history.txt"
|
||||
|
||||
# Try to get existing history first
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=history_key)
|
||||
history_content = response['Body'].read().decode('utf-8')
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
history_content = ""
|
||||
|
||||
# Append new entry
|
||||
history_content += f"{timestamp},{snapshot_fname}\n"
|
||||
|
||||
# Save updated history
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=history_key,
|
||||
Body=history_content
|
||||
)
|
||||
|
||||
return snapshot_fname
|
||||
|
||||
def get_history(self, watch_uuid):
|
||||
"""Get history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
dict: The history with timestamp keys and snapshot IDs as values
|
||||
"""
|
||||
tmp_history = {}
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
history_key = f"{watch_prefix}history.txt"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=history_key)
|
||||
history_content = response['Body'].read().decode('utf-8')
|
||||
|
||||
for line in history_content.splitlines():
|
||||
if ',' in line:
|
||||
k, v = line.strip().split(',', 2)
|
||||
tmp_history[k] = f"{watch_prefix}snapshots/{v}"
|
||||
|
||||
return tmp_history
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return {}
|
||||
|
||||
def get_history_snapshot(self, watch_uuid, timestamp):
|
||||
"""Get a history snapshot from S3
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str: The snapshot content
|
||||
"""
|
||||
history = self.get_history(watch_uuid)
|
||||
if not timestamp in history:
|
||||
return None
|
||||
|
||||
key = history[timestamp]
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
content = response['Body'].read()
|
||||
|
||||
if key.endswith('.br'):
|
||||
# Decompress brotli
|
||||
return brotli.decompress(content).decode('utf-8')
|
||||
else:
|
||||
return content.decode('utf-8')
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading snapshot from S3: {str(e)}")
|
||||
return None
|
||||
|
||||
def save_screenshot(self, watch_uuid, screenshot, as_error=False):
|
||||
"""Save a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
screenshot (bytes): Screenshot data
|
||||
as_error (bool): Whether this is an error screenshot
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
if as_error:
|
||||
key = f"{watch_prefix}last-error-screenshot.png"
|
||||
else:
|
||||
key = f"{watch_prefix}last-screenshot.png"
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=screenshot,
|
||||
ContentType='image/png'
|
||||
)
|
||||
|
||||
def get_screenshot(self, watch_uuid, is_error=False):
|
||||
"""Get a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get the error screenshot
|
||||
|
||||
Returns:
|
||||
bytes or None: The screenshot data or None if not available
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
if is_error:
|
||||
key = f"{watch_prefix}last-error-screenshot.png"
|
||||
else:
|
||||
key = f"{watch_prefix}last-screenshot.png"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
return response['Body'].read()
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return None
|
||||
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
"""Save error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Error contents
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}last-error.txt"
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=contents.encode('utf-8')
|
||||
)
|
||||
|
||||
def get_error_text(self, watch_uuid):
|
||||
"""Get error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str or False: The error text or False if not available
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}last-error.txt"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
return response['Body'].read().decode('utf-8')
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return False
|
||||
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
"""Save XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
data (dict): XPath data
|
||||
as_error (bool): Whether this is error data
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
if as_error:
|
||||
key = f"{watch_prefix}elements-error.deflate"
|
||||
else:
|
||||
key = f"{watch_prefix}elements.deflate"
|
||||
|
||||
compressed_data = zlib.compress(json.dumps(data).encode())
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=compressed_data
|
||||
)
|
||||
|
||||
def get_xpath_data(self, watch_uuid, is_error=False):
|
||||
"""Get XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get error data
|
||||
|
||||
Returns:
|
||||
dict or None: The XPath data or None if not available
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
if is_error:
|
||||
key = f"{watch_prefix}elements-error.deflate"
|
||||
else:
|
||||
key = f"{watch_prefix}elements.deflate"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
compressed_data = response['Body'].read()
|
||||
return json.loads(zlib.decompress(compressed_data).decode('utf-8'))
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return None
|
||||
|
||||
def save_last_fetched_html(self, watch_uuid, timestamp, contents):
|
||||
"""Save last fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
contents (str): HTML contents
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}html/{timestamp}.html.br"
|
||||
|
||||
contents_bytes = contents.encode('utf-8') if isinstance(contents, str) else contents
|
||||
try:
|
||||
compressed_contents = brotli.compress(contents_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"{watch_uuid} - Unable to compress HTML snapshot: {str(e)}")
|
||||
compressed_contents = contents_bytes
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=compressed_contents
|
||||
)
|
||||
|
||||
# Prune old snapshots - keep only the newest 2
|
||||
self._prune_last_fetched_html_snapshots(watch_uuid)
|
||||
|
||||
def _prune_last_fetched_html_snapshots(self, watch_uuid):
|
||||
"""Prune old HTML snapshots
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
html_prefix = f"{watch_prefix}html/"
|
||||
|
||||
# List all HTML snapshots
|
||||
response = self.s3.list_objects_v2(
|
||||
Bucket=self.bucket_name,
|
||||
Prefix=html_prefix
|
||||
)
|
||||
|
||||
if 'Contents' not in response:
|
||||
return
|
||||
|
||||
# Sort by timestamp (extract from key)
|
||||
html_files = sorted(
|
||||
response['Contents'],
|
||||
key=lambda x: int(x['Key'].split('/')[-1].split('.')[0]),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Delete all but the newest 2
|
||||
if len(html_files) > 2:
|
||||
for file in html_files[2:]:
|
||||
self.s3.delete_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=file['Key']
|
||||
)
|
||||
|
||||
def get_fetched_html(self, watch_uuid, timestamp):
|
||||
"""Get fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str or False: The HTML or False if not available
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}html/{timestamp}.html.br"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
compressed_data = response['Body'].read()
|
||||
return brotli.decompress(compressed_data).decode('utf-8')
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return False
|
||||
|
||||
def save_last_text_fetched_before_filters(self, watch_uuid, contents):
|
||||
"""Save the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Text contents
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}last-fetched.br"
|
||||
|
||||
compressed_contents = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=compressed_contents
|
||||
)
|
||||
|
||||
def get_last_fetched_text_before_filters(self, watch_uuid):
|
||||
"""Get the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The text
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}last-fetched.br"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
compressed_data = response['Body'].read()
|
||||
return brotli.decompress(compressed_data).decode('utf-8')
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
history = self.get_history(watch_uuid)
|
||||
dates = list(history.keys())
|
||||
|
||||
if len(dates):
|
||||
return self.get_history_snapshot(watch_uuid, dates[-1])
|
||||
else:
|
||||
return ''
|
||||
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
"""Check if visual selector data is ready
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
bool: Whether visual selector data is ready
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
screenshot_key = f"{watch_prefix}last-screenshot.png"
|
||||
elements_key = f"{watch_prefix}elements.deflate"
|
||||
|
||||
try:
|
||||
# Just check if both files exist
|
||||
self.s3.head_object(Bucket=self.bucket_name, Key=screenshot_key)
|
||||
self.s3.head_object(Bucket=self.bucket_name, Key=elements_key)
|
||||
return True
|
||||
except self.s3.exceptions.ClientError:
|
||||
return False
|
||||
|
||||
def clear_watch_history(self, watch_uuid):
|
||||
"""Clear history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
# List all objects with this watch's prefix
|
||||
paginator = self.s3.get_paginator('list_objects_v2')
|
||||
pages = paginator.paginate(
|
||||
Bucket=self.bucket_name,
|
||||
Prefix=watch_prefix
|
||||
)
|
||||
|
||||
# Delete all objects in batches
|
||||
for page in pages:
|
||||
if 'Contents' not in page:
|
||||
continue
|
||||
|
||||
delete_keys = {'Objects': [{'Key': obj['Key']} for obj in page['Contents']]}
|
||||
self.s3.delete_objects(
|
||||
Bucket=self.bucket_name,
|
||||
Delete=delete_keys
|
||||
)
|
||||
|
||||
def delete_watch(self, watch_uuid):
|
||||
"""Delete a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# Same implementation as clear_watch_history for S3
|
||||
self.clear_watch_history(watch_uuid)
|
||||
230
changedetectionio/storage/storage_base.py
Normal file
@@ -0,0 +1,230 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import json
|
||||
from loguru import logger
|
||||
|
||||
class StorageBase(ABC):
|
||||
"""Abstract base class for storage backends"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, datastore_path, include_default_watches=True, version_tag="0.0.0"):
|
||||
"""Initialize the storage backend
|
||||
|
||||
Args:
|
||||
datastore_path (str): Path to the datastore
|
||||
include_default_watches (bool): Whether to include default watches
|
||||
version_tag (str): Version tag
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def load_data(self):
|
||||
"""Load data from the storage backend
|
||||
|
||||
Returns:
|
||||
dict: The loaded data
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_data(self, data):
|
||||
"""Save data to the storage backend
|
||||
|
||||
Args:
|
||||
data (dict): The data to save
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_history_text(self, watch_uuid, contents, timestamp, snapshot_id):
|
||||
"""Save history text to the storage backend
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Contents to save
|
||||
timestamp (int): Timestamp
|
||||
snapshot_id (str): Snapshot ID
|
||||
|
||||
Returns:
|
||||
str: Snapshot filename or ID
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_history_snapshot(self, watch_uuid, timestamp):
|
||||
"""Get a history snapshot from the storage backend
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str: The snapshot content
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_history(self, watch_uuid):
|
||||
"""Get history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
dict: The history with timestamp keys and snapshot IDs as values
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_screenshot(self, watch_uuid, screenshot, as_error=False):
|
||||
"""Save a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
screenshot (bytes): Screenshot data
|
||||
as_error (bool): Whether this is an error screenshot
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_screenshot(self, watch_uuid, is_error=False):
|
||||
"""Get a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get the error screenshot
|
||||
|
||||
Returns:
|
||||
str or None: The screenshot path or None if not available
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
"""Save error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Error contents
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_error_text(self, watch_uuid):
|
||||
"""Get error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str or False: The error text or False if not available
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
"""Save XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
data (dict): XPath data
|
||||
as_error (bool): Whether this is error data
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_xpath_data(self, watch_uuid, is_error=False):
|
||||
"""Get XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get error data
|
||||
|
||||
Returns:
|
||||
dict or None: The XPath data or None if not available
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_last_fetched_html(self, watch_uuid, timestamp, contents):
|
||||
"""Save last fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
contents (str): HTML contents
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_fetched_html(self, watch_uuid, timestamp):
|
||||
"""Get fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str or False: The HTML or False if not available
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_last_text_fetched_before_filters(self, watch_uuid, contents):
|
||||
"""Save the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Text contents
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_last_fetched_text_before_filters(self, watch_uuid):
|
||||
"""Get the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The text
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def ensure_data_dir_exists(self, watch_uuid):
|
||||
"""Ensure the data directory exists for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
"""Check if visual selector data is ready
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
bool: Whether visual selector data is ready
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def clear_watch_history(self, watch_uuid):
|
||||
"""Clear history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete_watch(self, watch_uuid):
|
||||
"""Delete a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
pass
|
||||
33
changedetectionio/storage/storage_factory.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import re
|
||||
from loguru import logger
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from .storage_base import StorageBase
|
||||
from .filesystem_storage import FileSystemStorage
|
||||
from .mongodb_storage import MongoDBStorage
|
||||
from .s3_storage import S3Storage
|
||||
|
||||
def create_storage(datastore_path, include_default_watches=True, version_tag="0.0.0"):
|
||||
"""Create a storage backend based on the datastore path
|
||||
|
||||
Args:
|
||||
datastore_path (str): Path to the datastore
|
||||
include_default_watches (bool): Whether to include default watches
|
||||
version_tag (str): Version tag
|
||||
|
||||
Returns:
|
||||
StorageBase: The storage backend
|
||||
"""
|
||||
# Check if it's a MongoDB URI
|
||||
if datastore_path.startswith('mongodb://') or datastore_path.startswith('mongodb+srv://'):
|
||||
logger.info(f"Using MongoDB storage backend with URI {datastore_path}")
|
||||
return MongoDBStorage(datastore_path, include_default_watches, version_tag)
|
||||
|
||||
# Check if it's an S3 URI
|
||||
if datastore_path.startswith('s3://'):
|
||||
logger.info(f"Using S3 storage backend with URI {datastore_path}")
|
||||
return S3Storage(datastore_path, include_default_watches, version_tag)
|
||||
|
||||
# Default to filesystem
|
||||
logger.info(f"Using filesystem storage backend with path {datastore_path}")
|
||||
return FileSystemStorage(datastore_path, include_default_watches, version_tag)
|
||||
@@ -20,6 +20,7 @@ from loguru import logger
|
||||
|
||||
from .processors import get_custom_watch_obj_for_processor
|
||||
from .processors.restock_diff import Restock
|
||||
from .storage.storage_factory import create_storage
|
||||
|
||||
# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
|
||||
BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
|
||||
@@ -38,20 +39,28 @@ class ChangeDetectionStore:
|
||||
needs_write_urgent = False
|
||||
|
||||
__version_check = True
|
||||
|
||||
# Singleton instance for access from Watch class methods
|
||||
instance = None
|
||||
|
||||
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
|
||||
# Should only be active for docker
|
||||
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
|
||||
self.__data = App.model()
|
||||
self.datastore_path = datastore_path
|
||||
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
|
||||
logger.info(f"Datastore path is '{self.json_store_path}'")
|
||||
|
||||
# Create the appropriate storage backend based on the datastore path
|
||||
self.storage = create_storage(datastore_path, include_default_watches, version_tag)
|
||||
|
||||
self.needs_write = False
|
||||
self.start_time = time.time()
|
||||
self.stop_thread = False
|
||||
# Base definition for all watchers
|
||||
# deepcopy part of #569 - not sure why its needed exactly
|
||||
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
|
||||
self.generic_definition = deepcopy(Watch.model(datastore_path=datastore_path, default={}))
|
||||
|
||||
# Set singleton instance
|
||||
ChangeDetectionStore.instance = self
|
||||
|
||||
if path.isfile('changedetectionio/source.txt'):
|
||||
with open('changedetectionio/source.txt') as f:
|
||||
@@ -60,10 +69,9 @@ class ChangeDetectionStore:
|
||||
self.__data['build_sha'] = f.read()
|
||||
|
||||
try:
|
||||
# @todo retest with ", encoding='utf-8'"
|
||||
with open(self.json_store_path) as json_file:
|
||||
from_disk = json.load(json_file)
|
||||
|
||||
# Load data from storage
|
||||
from_disk = self.storage.load_data()
|
||||
if from_disk:
|
||||
# @todo isnt there a way todo this dict.update recursively?
|
||||
# Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
|
||||
if 'watching' in from_disk:
|
||||
@@ -91,22 +99,24 @@ class ChangeDetectionStore:
|
||||
for uuid, tag in self.__data['settings']['application']['tags'].items():
|
||||
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(uuid, tag, processor_override='restock_diff')
|
||||
logger.info(f"Tag: {uuid} {tag['title']}")
|
||||
else:
|
||||
# First time ran, Create the datastore.
|
||||
if include_default_watches:
|
||||
logger.critical(f"No data store found, creating new store")
|
||||
self.add_watch(url='https://news.ycombinator.com/',
|
||||
tag='Tech news',
|
||||
extras={'fetch_backend': 'html_requests'})
|
||||
|
||||
# First time ran, Create the datastore.
|
||||
except (FileNotFoundError):
|
||||
if include_default_watches:
|
||||
logger.critical(f"No JSON DB found at {self.json_store_path}, creating JSON store at {self.datastore_path}")
|
||||
self.add_watch(url='https://news.ycombinator.com/',
|
||||
tag='Tech news',
|
||||
extras={'fetch_backend': 'html_requests'})
|
||||
self.add_watch(url='https://changedetection.io/CHANGELOG.txt',
|
||||
tag='changedetection.io',
|
||||
extras={'fetch_backend': 'html_requests'})
|
||||
|
||||
self.add_watch(url='https://changedetection.io/CHANGELOG.txt',
|
||||
tag='changedetection.io',
|
||||
extras={'fetch_backend': 'html_requests'})
|
||||
|
||||
updates_available = self.get_updates_available()
|
||||
self.__data['settings']['application']['schema_version'] = updates_available.pop()
|
||||
updates_available = self.get_updates_available()
|
||||
self.__data['settings']['application']['schema_version'] = updates_available.pop()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading data from storage: {str(e)}")
|
||||
raise e
|
||||
else:
|
||||
# Bump the update version by running updates
|
||||
self.run_updates()
|
||||
@@ -227,23 +237,15 @@ class ChangeDetectionStore:
|
||||
|
||||
# Delete a single watch by UUID
|
||||
def delete(self, uuid):
|
||||
import pathlib
|
||||
import shutil
|
||||
|
||||
with self.lock:
|
||||
if uuid == 'all':
|
||||
# Delete all watches
|
||||
for watch_uuid in list(self.data['watching'].keys()):
|
||||
self.storage.delete_watch(watch_uuid)
|
||||
self.__data['watching'] = {}
|
||||
|
||||
# GitHub #30 also delete history records
|
||||
for uuid in self.data['watching']:
|
||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||
if os.path.exists(path):
|
||||
shutil.rmtree(path)
|
||||
|
||||
else:
|
||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||
if os.path.exists(path):
|
||||
shutil.rmtree(path)
|
||||
# Delete a single watch
|
||||
self.storage.delete_watch(uuid)
|
||||
del self.data['watching'][uuid]
|
||||
|
||||
self.needs_write_urgent = True
|
||||
@@ -266,6 +268,7 @@ class ChangeDetectionStore:
|
||||
|
||||
# Remove a watchs data but keep the entry (URL etc)
|
||||
def clear_watch_history(self, uuid):
|
||||
self.storage.clear_watch_history(uuid)
|
||||
self.__data['watching'][uuid].clear_watch()
|
||||
self.needs_write_urgent = True
|
||||
|
||||
@@ -372,43 +375,30 @@ class ChangeDetectionStore:
|
||||
return new_uuid
|
||||
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
|
||||
screenshot_filename = "{}/last-screenshot.png".format(output_path)
|
||||
elements_index_filename = "{}/elements.deflate".format(output_path)
|
||||
if path.isfile(screenshot_filename) and path.isfile(elements_index_filename) :
|
||||
return True
|
||||
|
||||
return False
|
||||
return self.storage.visualselector_data_is_ready(watch_uuid)
|
||||
|
||||
def sync_to_json(self):
|
||||
logger.info("Saving JSON..")
|
||||
logger.info("Saving data to storage backend...")
|
||||
try:
|
||||
data = deepcopy(self.__data)
|
||||
except RuntimeError as e:
|
||||
# Try again in 15 seconds
|
||||
time.sleep(15)
|
||||
logger.error(f"! Data changed when writing to JSON, trying again.. {str(e)}")
|
||||
logger.error(f"! Data changed when writing to storage, trying again.. {str(e)}")
|
||||
self.sync_to_json()
|
||||
return
|
||||
else:
|
||||
|
||||
try:
|
||||
# Re #286 - First write to a temp file, then confirm it looks OK and rename it
|
||||
# This is a fairly basic strategy to deal with the case that the file is corrupted,
|
||||
# system was out of memory, out of RAM etc
|
||||
with open(self.json_store_path+".tmp", 'w') as json_file:
|
||||
json.dump(data, json_file, indent=4)
|
||||
os.replace(self.json_store_path+".tmp", self.json_store_path)
|
||||
self.storage.save_data(data)
|
||||
except Exception as e:
|
||||
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
|
||||
logger.error(f"Error writing to storage backend: {str(e)}")
|
||||
|
||||
self.needs_write = False
|
||||
self.needs_write_urgent = False
|
||||
|
||||
# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON
|
||||
# Thread runner, this helps with thread/write issues when there are many operations that want to update the data
|
||||
# by just running periodically in one thread, according to python, dict updates are threadsafe.
|
||||
def save_datastore(self):
|
||||
|
||||
while True:
|
||||
if self.stop_thread:
|
||||
# Suppressing "Logging error in Loguru Handler #0" during CICD.
|
||||
|
||||
@@ -12,23 +12,25 @@
|
||||
}}
|
||||
<div class="pure-form-message-inline">
|
||||
<p>
|
||||
<strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
|
||||
<strong>Tip:</strong> Use <a target="newwindow" href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
|
||||
</p>
|
||||
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
|
||||
<ul style="display: none" id="advanced-help-notifications">
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
|
||||
<li><code>gets://</code>, <code>posts://</code>, <code>puts://</code>, <code>deletes://</code> for direct API calls (or omit the "<code>s</code>" for non-SSL ie <code>get://</code>) <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes#postposts">more help here</a></li>
|
||||
<li>Accepts the <code>{{ '{{token}}' }}</code> placeholders listed below</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="notifications-wrapper">
|
||||
<a id="send-test-notification" class="pure-button button-secondary button-xsmall" >Send test notification</a>
|
||||
<a id="send-test-notification" class="pure-button button-secondary button-xsmall" >Send test notification</a> <div class="spinner" style="display: none;"></div>
|
||||
{% if emailprefix %}
|
||||
<a id="add-email-helper" class="pure-button button-secondary button-xsmall" >Add email <img style="height: 1em; display: inline-block" src="{{url_for('static_content', group='images', filename='email.svg')}}" alt="Add an email address"> </a>
|
||||
{% endif %}
|
||||
<a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" >Notification debug logs</a>
|
||||
<br>
|
||||
<div id="notification-test-log" style="display: none;"><span class="pure-form-message-inline">Processing..</span></div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="notification-customisation" class="pure-control-group">
|
||||
@@ -38,7 +40,7 @@
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.notification_body , rows=5, class="notification-body", placeholder=settings_application['notification_body']) }}
|
||||
<span class="pure-form-message-inline">Body for all notifications ‐ You can use <a target="_new" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> templating in the notification title, body and URL, and tokens from below.
|
||||
<span class="pure-form-message-inline">Body for all notifications ‐ You can use <a target="newwindow" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> templating in the notification title, body and URL, and tokens from below.
|
||||
</span>
|
||||
|
||||
</div>
|
||||
@@ -124,7 +126,7 @@
|
||||
<div class="pure-form-message-inline">
|
||||
<p>
|
||||
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. <br>
|
||||
For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
|
||||
For example, an addition or removal could be perceived as a change in some cases. <a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
|
||||
</p>
|
||||
<p>
|
||||
For JSON payloads, use <strong>|tojson</strong> without quotes for automatic escaping, for example - <code>{ "name": {{ '{{ watch_title|tojson }}' }} }</code>
|
||||
|
||||
@@ -61,6 +61,18 @@
|
||||
{{ field(**kwargs)|safe }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro playwright_warning() %}
|
||||
<p><strong>Error - Playwright support for Chrome based fetching is not enabled.</strong> Alternatively try our <a href="https://changedetection.io">very affordable subscription based service which has all this setup for you</a>.</p>
|
||||
<p>You may need to <a href="https://github.com/dgtlmoon/changedetection.io/blob/09ebc6ec6338545bdd694dc6eee57f2e9d2b8075/docker-compose.yml#L31">Enable playwright environment variable</a> and uncomment the <strong>sockpuppetbrowser</strong> in the <a href="https://github.com/dgtlmoon/changedetection.io/blob/master/docker-compose.yml">docker-compose.yml</a> file.</p>
|
||||
<br>
|
||||
<p>(Also Selenium/WebDriver can not extract full page screenshots reliably so Playwright is recommended here)</p>
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
{% macro only_webdriver_type_watches_warning() %}
|
||||
<p><strong>Sorry, this functionality only works with Playwright/Chrome enabled watches.<br>You need to <a href="#request">Set the fetch method to Playwright/Chrome mode and resave</a> and have the Playwright connection enabled.</strong></p><br>
|
||||
{% endmacro %}
|
||||
|
||||
{% macro render_time_schedule_form(form, available_timezones, timezone_default_config) %}
|
||||
<style>
|
||||
.day-schedule *, .day-schedule select {
|
||||
|
||||
@@ -159,7 +159,7 @@
|
||||
<a id="chrome-extension-link"
|
||||
title="Try our new Chrome Extension!"
|
||||
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
||||
<img src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}">
|
||||
<img alt="Chrome store icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}">
|
||||
Chrome Webstore
|
||||
</a>
|
||||
</p>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
|
||||
@@ -40,14 +40,13 @@
|
||||
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab" id=""><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#request">Request</a></li>
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
{% if playwright_enabled %}
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
|
||||
{% endif %}
|
||||
<!-- should goto extra forms? -->
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
@@ -59,7 +58,7 @@
|
||||
|
||||
<div class="box-wrap inner">
|
||||
<form class="pure-form pure-form-stacked"
|
||||
action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next'), unpause_on_save = request.args.get('unpause_on_save')) }}" method="POST">
|
||||
action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next'), unpause_on_save = request.args.get('unpause_on_save'), tag = request.args.get('tag')) }}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
|
||||
<div class="tab-pane-inner" id="general">
|
||||
@@ -199,8 +198,9 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
{% if playwright_enabled %}
|
||||
|
||||
<div class="tab-pane-inner" id="browser-steps">
|
||||
{% if playwright_enabled and watch_uses_webdriver %}
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
@@ -224,7 +224,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
<span class="loader" >
|
||||
<span id="browsersteps-click-start">
|
||||
<h2 >Click here to Start</h2>
|
||||
<svg style="height: 3.5rem;" version="1.1" viewBox="0 0 32 32" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g id="Layer_1"/><g id="play_x5F_alt"><path d="M16,0C7.164,0,0,7.164,0,16s7.164,16,16,16s16-7.164,16-16S24.836,0,16,0z M10,24V8l16.008,8L10,24z" style="fill: var(--color-grey-400);"/></g></svg><br>
|
||||
<svg style="height: 3.5rem;" version="1.1" viewBox="0 0 32 32" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g id="start"/><g id="play_x5F_alt"><path d="M16,0C7.164,0,0,7.164,0,16s7.164,16,16,16s16-7.164,16-16S24.836,0,16,0z M10,24V8l16.008,8L10,24z" style="fill: var(--color-grey-400);"/></g></svg><br>
|
||||
Please allow 10-15 seconds for the browser to connect.<br>
|
||||
</span>
|
||||
<div class="spinner" style="display: none;"></div>
|
||||
@@ -234,21 +234,31 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</div>
|
||||
<div id="browser-steps-fieldlist" >
|
||||
<span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
<span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
{{ render_field(form.browser_steps) }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
{% else %}
|
||||
<span class="pure-form-message-inline">
|
||||
{% if not watch_uses_webdriver %}
|
||||
{{ only_webdriver_type_watches_warning() }}
|
||||
{% endif %}
|
||||
{% if not playwright_enabled %}
|
||||
{{ playwright_warning() }}
|
||||
{% endif %}
|
||||
</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
||||
<div class="tab-pane-inner" id="notifications">
|
||||
<fieldset>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_checkbox_field(form.notification_muted) }}
|
||||
</div>
|
||||
{% if is_html_webdriver %}
|
||||
{% if watch_uses_webdriver %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_checkbox_field(form.notification_screenshot) }}
|
||||
<span class="pure-form-message-inline">
|
||||
@@ -298,7 +308,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br>
|
||||
{% endif %}
|
||||
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
|
||||
<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
|
||||
<span data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</span><br>
|
||||
<ul id="advanced-help-selectors" style="display: none;">
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
|
||||
@@ -471,7 +481,7 @@ keyword") }}
|
||||
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{% if visualselector_enabled %}
|
||||
{% if playwright_enabled and watch_uses_webdriver %}
|
||||
<span class="pure-form-message-inline" id="visual-selector-heading">
|
||||
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
|
||||
</span>
|
||||
@@ -489,11 +499,12 @@ keyword") }}
|
||||
</div>
|
||||
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong> <span class="text">Loading...</span></div>
|
||||
{% else %}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>Sorry, this functionality only works with Playwright/Chrome enabled watches.</p>
|
||||
<p>Enable the Playwright Chrome fetcher, or alternatively try our <a href="https://lemonade.changedetection.io/start">very affordable subscription based service</a>.</p>
|
||||
<p>This is because Selenium/WebDriver can not extract full page screenshots reliably.</p>
|
||||
</span>
|
||||
{% if not watch_uses_webdriver %}
|
||||
{{ only_webdriver_type_watches_warning() }}
|
||||
{% endif %}
|
||||
{% if not playwright_enabled %}
|
||||
{{ playwright_warning() }}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
@@ -214,7 +214,7 @@ nav
|
||||
<a id="chrome-extension-link"
|
||||
title="Try our new Chrome Extension!"
|
||||
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
||||
<img src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome">
|
||||
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome">
|
||||
Chrome Webstore
|
||||
</a>
|
||||
</p>
|
||||
@@ -280,9 +280,7 @@ nav
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<p>
|
||||
Your proxy provider may need to whitelist our IP of <code>204.15.192.195</code>
|
||||
</p>
|
||||
|
||||
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
|
||||
@@ -1 +1 @@
|
||||
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 122.879 119.799" enable-background="new 0 0 122.879 119.799" xml:space="preserve"><g><path d="M49.988,0h0.016v0.007C63.803,0.011,76.298,5.608,85.34,14.652c9.027,9.031,14.619,21.515,14.628,35.303h0.007v0.033v0.04 h-0.007c-0.005,5.557-0.917,10.905-2.594,15.892c-0.281,0.837-0.575,1.641-0.877,2.409v0.007c-1.446,3.66-3.315,7.12-5.547,10.307 l29.082,26.139l0.018,0.016l0.157,0.146l0.011,0.011c1.642,1.563,2.536,3.656,2.649,5.78c0.11,2.1-0.543,4.248-1.979,5.971 l-0.011,0.016l-0.175,0.203l-0.035,0.035l-0.146,0.16l-0.016,0.021c-1.565,1.642-3.654,2.534-5.78,2.646 c-2.097,0.111-4.247-0.54-5.971-1.978l-0.015-0.011l-0.204-0.175l-0.029-0.024L78.761,90.865c-0.88,0.62-1.778,1.209-2.687,1.765 c-1.233,0.755-2.51,1.466-3.813,2.115c-6.699,3.342-14.269,5.222-22.272,5.222v0.007h-0.016v-0.007 c-13.799-0.004-26.296-5.601-35.338-14.645C5.605,76.291,0.016,63.805,0.007,50.021H0v-0.033v-0.016h0.007 c0.004-13.799,5.601-26.296,14.645-35.338C23.683,5.608,36.167,0.016,49.955,0.007V0H49.988L49.988,0z M50.004,11.21v0.007h-0.016 h-0.033V11.21c-10.686,0.007-20.372,4.35-27.384,11.359C15.56,29.578,11.213,39.274,11.21,49.973h0.007v0.016v0.033H11.21 c0.007,10.686,4.347,20.367,11.359,27.381c7.009,7.012,16.705,11.359,27.403,11.361v-0.007h0.016h0.033v0.007 c10.686-0.007,20.368-4.348,27.382-11.359c7.011-7.009,11.358-16.702,11.36-27.4h-0.006v-0.016v-0.033h0.006 c-0.006-10.686-4.35-20.372-11.358-27.384C70.396,15.56,60.703,11.213,50.004,11.21L50.004,11.21z"/></g></svg>
|
||||
<svg version="1.1" id="search" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 122.879 119.799" enable-background="new 0 0 122.879 119.799" xml:space="preserve"><g><path d="M49.988,0h0.016v0.007C63.803,0.011,76.298,5.608,85.34,14.652c9.027,9.031,14.619,21.515,14.628,35.303h0.007v0.033v0.04 h-0.007c-0.005,5.557-0.917,10.905-2.594,15.892c-0.281,0.837-0.575,1.641-0.877,2.409v0.007c-1.446,3.66-3.315,7.12-5.547,10.307 l29.082,26.139l0.018,0.016l0.157,0.146l0.011,0.011c1.642,1.563,2.536,3.656,2.649,5.78c0.11,2.1-0.543,4.248-1.979,5.971 l-0.011,0.016l-0.175,0.203l-0.035,0.035l-0.146,0.16l-0.016,0.021c-1.565,1.642-3.654,2.534-5.78,2.646 c-2.097,0.111-4.247-0.54-5.971-1.978l-0.015-0.011l-0.204-0.175l-0.029-0.024L78.761,90.865c-0.88,0.62-1.778,1.209-2.687,1.765 c-1.233,0.755-2.51,1.466-3.813,2.115c-6.699,3.342-14.269,5.222-22.272,5.222v0.007h-0.016v-0.007 c-13.799-0.004-26.296-5.601-35.338-14.645C5.605,76.291,0.016,63.805,0.007,50.021H0v-0.033v-0.016h0.007 c0.004-13.799,5.601-26.296,14.645-35.338C23.683,5.608,36.167,0.016,49.955,0.007V0H49.988L49.988,0z M50.004,11.21v0.007h-0.016 h-0.033V11.21c-10.686,0.007-20.372,4.35-27.384,11.359C15.56,29.578,11.213,39.274,11.21,49.973h0.007v0.016v0.033H11.21 c0.007,10.686,4.347,20.367,11.359,27.381c7.009,7.012,16.705,11.359,27.403,11.361v-0.007h0.016h0.033v0.007 c10.686-0.007,20.368-4.348,27.382-11.359c7.011-7.009,11.358-16.702,11.36-27.4h-0.006v-0.016v-0.033h0.006 c-0.006-10.686-4.35-20.372-11.358-27.384C70.396,15.56,60.703,11.213,50.004,11.21L50.004,11.21z"/></g></svg>
|
||||
|
Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.5 KiB |
@@ -6,7 +6,7 @@
|
||||
|
||||
<div class="box">
|
||||
|
||||
<form class="pure-form" action="{{ url_for('form_quick_watch_add') }}" method="POST" id="new-watch-form">
|
||||
<form class="pure-form" action="{{ url_for('form_quick_watch_add', tag=active_tag_uuid) }}" method="POST" id="new-watch-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
<fieldset>
|
||||
<legend>Add a new change detection watch</legend>
|
||||
@@ -108,7 +108,8 @@
|
||||
{% else %}
|
||||
<a class="state-on" href="{{url_for('index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
|
||||
{% endif %}
|
||||
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
|
||||
{% set mute_label = 'UnMute notification' if watch.notification_muted else 'Mute notification' %}
|
||||
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ mute_label }}" title="{{ mute_label }}" class="icon icon-mute" ></a>
|
||||
</td>
|
||||
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
|
||||
@@ -118,7 +119,7 @@
|
||||
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
||||
or "extra_browser_" in watch.get_fetch_backend
|
||||
%}
|
||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a Chrome browser" >
|
||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
||||
{% endif %}
|
||||
|
||||
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
|
||||
@@ -187,11 +188,11 @@
|
||||
<td>
|
||||
<a {% if watch.uuid in queued_uuids %}disabled="true"{% endif %} href="{{ url_for('form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}"
|
||||
class="recheck pure-button pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
|
||||
<a href="{{ url_for('edit_page', uuid=watch.uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
|
||||
<a href="{{ url_for('edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
|
||||
{% if watch.history_n >= 2 %}
|
||||
|
||||
{% if is_unviewed %}
|
||||
<a href="{{ url_for('diff_history_page', uuid=watch.uuid, from_version=watch.get_next_snapshot_key_to_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
|
||||
<a href="{{ url_for('diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
|
||||
{% else %}
|
||||
<a href="{{ url_for('diff_history_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
|
||||
{% endif %}
|
||||
|
||||
@@ -34,7 +34,7 @@ def test_execute_custom_js(client, live_server, measure_memory_usage):
|
||||
assert b"unpaused" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n >= 1, "Watch history had atleast 1 (everything fetched OK)"
|
||||
|
||||
assert b"This text should be removed" not in res.data
|
||||
|
||||
@@ -48,7 +48,7 @@ def test_noproxy_option(client, live_server, measure_memory_usage):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid=uuid, unpause_on_save=1))
|
||||
assert b'No proxy' in res.data
|
||||
|
||||
@@ -81,7 +81,7 @@ def test_socks5(client, live_server, measure_memory_usage):
|
||||
assert "Awesome, you made it".encode('utf-8') in res.data
|
||||
|
||||
# PROXY CHECKER WIDGET CHECK - this needs more checking
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
res = client.get(
|
||||
url_for("check_proxies.start_check", uuid=uuid),
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os.path
|
||||
import time
|
||||
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
|
||||
from changedetectionio import html_tools
|
||||
|
||||
|
||||
def set_original(excluding=None, add_line=None):
|
||||
@@ -113,7 +113,8 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
||||
"application-notification_body": 'triggered text was -{{triggered_text}}- 网站监测 内容更新了',
|
||||
# triggered_text will contain multiple lines
|
||||
"application-notification_body": 'triggered text was -{{triggered_text}}- ### 网站监测 内容更新了 ####',
|
||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||
"application-notification_urls": test_notification_url,
|
||||
"application-minutes_between_check": 180,
|
||||
@@ -171,7 +172,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
|
||||
with open("test-datastore/notification.txt", 'rb') as f:
|
||||
response = f.read()
|
||||
assert b'-Oh yes please-' in response
|
||||
assert b'-Oh yes please' in response
|
||||
assert '网站监测 内容更新了'.encode('utf-8') in response
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
@@ -44,7 +44,6 @@ def set_modified_response():
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def is_valid_uuid(val):
|
||||
try:
|
||||
uuid.UUID(str(val))
|
||||
@@ -56,8 +55,9 @@ def is_valid_uuid(val):
|
||||
def test_setup(client, live_server, measure_memory_usage):
|
||||
live_server_setup(live_server)
|
||||
|
||||
|
||||
def test_api_simple(client, live_server, measure_memory_usage):
|
||||
#live_server_setup(live_server)
|
||||
# live_server_setup(live_server)
|
||||
|
||||
api_key = extract_api_key_from_UI(client)
|
||||
|
||||
@@ -129,6 +129,9 @@ def test_api_simple(client, live_server, measure_memory_usage):
|
||||
assert after_recheck_info['last_checked'] != before_recheck_info['last_checked']
|
||||
assert after_recheck_info['last_changed'] != 0
|
||||
|
||||
# #2877 When run in a slow fetcher like playwright etc
|
||||
assert after_recheck_info['last_changed'] == after_recheck_info['last_checked']
|
||||
|
||||
# Check history index list
|
||||
res = client.get(
|
||||
url_for("watchhistory", uuid=watch_uuid),
|
||||
|
||||
@@ -99,7 +99,7 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
||||
assert b'ldjson-price-track-offer' in res.data
|
||||
|
||||
# Accept it
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
#time.sleep(1)
|
||||
client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from urllib.request import urlopen
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
|
||||
extract_UUID_from_client
|
||||
|
||||
@@ -69,7 +68,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# Check the 'get latest snapshot works'
|
||||
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
|
||||
|
||||
@@ -40,7 +40,7 @@ def test_check_encoding_detection(client, live_server, measure_memory_usage):
|
||||
|
||||
|
||||
# Content type recording worked
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['content-type'] == "text/html"
|
||||
|
||||
res = client.get(
|
||||
|
||||
@@ -51,7 +51,7 @@ def run_filter_test(client, live_server, content_filter):
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"
|
||||
|
||||
|
||||
@@ -288,7 +288,7 @@ def test_clone_tag_on_import(client, live_server, measure_memory_usage):
|
||||
assert b'test-tag' in res.data
|
||||
assert b'another-tag' in res.data
|
||||
|
||||
watch_uuid = extract_UUID_from_client(client)
|
||||
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.get(url_for("form_clone", uuid=watch_uuid), follow_redirects=True)
|
||||
|
||||
assert b'Cloned' in res.data
|
||||
@@ -315,7 +315,7 @@ def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usa
|
||||
assert b'test-tag' in res.data
|
||||
assert b'another-tag' in res.data
|
||||
|
||||
watch_uuid = extract_UUID_from_client(client)
|
||||
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.get(url_for("form_clone", uuid=watch_uuid), follow_redirects=True)
|
||||
|
||||
assert b'Cloned' in res.data
|
||||
|
||||
@@ -36,7 +36,7 @@ def test_ignore(client, live_server, measure_memory_usage):
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
# use the highlighter endpoint
|
||||
res = client.post(
|
||||
url_for("highlight_submit_ignore_url", uuid=uuid),
|
||||
|
||||
@@ -514,3 +514,15 @@ def test_check_jq_ext_filter(client, live_server, measure_memory_usage):
|
||||
def test_check_jqraw_ext_filter(client, live_server, measure_memory_usage):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||
|
||||
def test_jsonpath_BOM_utf8(client, live_server, measure_memory_usage):
|
||||
from .. import html_tools
|
||||
|
||||
# JSON string with BOM and correct double-quoted keys
|
||||
json_str = '\ufeff{"name": "José", "emoji": "😊", "language": "中文", "greeting": "Привет"}'
|
||||
|
||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||
text = html_tools.extract_json_as_string(json_str, "json:$.name")
|
||||
assert text == '"José"'
|
||||
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ def test_content_filter_live_preview(client, live_server, measure_memory_usage):
|
||||
data={"url": test_url, "tags": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid=uuid),
|
||||
data={
|
||||
|
||||
@@ -48,7 +48,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
#####################
|
||||
client.post(
|
||||
url_for("settings_page"),
|
||||
data={"application-empty_pages_are_a_change": "",
|
||||
data={"application-empty_pages_are_a_change": "", # default, OFF, they are NOT a change
|
||||
"requests-time_between_check-minutes": 180,
|
||||
'application-fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
@@ -66,6 +66,14 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
|
||||
assert watch.last_changed == 0
|
||||
assert watch['last_checked'] != 0
|
||||
|
||||
|
||||
|
||||
|
||||
# ok now do the opposite
|
||||
|
||||
@@ -92,6 +100,10 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# A totally zero byte (#2528) response should also not trigger an error
|
||||
set_zero_byte_response()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# 2877
|
||||
assert watch.last_changed == watch['last_checked']
|
||||
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON
|
||||
|
||||
@@ -6,7 +6,7 @@ from flask import url_for
|
||||
from loguru import logger
|
||||
|
||||
from .util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup, wait_for_all_checks, \
|
||||
set_longer_modified_response
|
||||
set_longer_modified_response, get_index
|
||||
from . util import extract_UUID_from_client
|
||||
import logging
|
||||
import base64
|
||||
@@ -29,7 +29,7 @@ def test_check_notification(client, live_server, measure_memory_usage):
|
||||
|
||||
# Re 360 - new install should have defaults set
|
||||
res = client.get(url_for("settings_page"))
|
||||
notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')
|
||||
notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')+"?status_code=204"
|
||||
|
||||
assert default_notification_body.encode() in res.data
|
||||
assert default_notification_title.encode() in res.data
|
||||
@@ -76,7 +76,7 @@ def test_check_notification(client, live_server, measure_memory_usage):
|
||||
testimage_png = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='
|
||||
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
datastore = 'test-datastore'
|
||||
with open(os.path.join(datastore, str(uuid), 'last-screenshot.png'), 'wb') as f:
|
||||
f.write(base64.b64decode(testimage_png))
|
||||
@@ -135,7 +135,14 @@ def test_check_notification(client, live_server, measure_memory_usage):
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(3)
|
||||
|
||||
# Check no errors were recorded
|
||||
res = client.get(url_for("index"))
|
||||
assert b'notification-error' not in res.data
|
||||
|
||||
|
||||
# Verify what was sent as a notification, this file should exist
|
||||
with open("test-datastore/notification.txt", "r") as f:
|
||||
notification_submission = f.read()
|
||||
@@ -284,7 +291,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
||||
# CUSTOM JSON BODY CHECK for POST://
|
||||
set_original_response()
|
||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#header-manipulation
|
||||
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')+"?xxx={{ watch_url }}&+custom-header=123&+second=hello+world%20%22space%22"
|
||||
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')+"?status_code=204&xxx={{ watch_url }}&+custom-header=123&+second=hello+world%20%22space%22"
|
||||
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
@@ -319,6 +326,11 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
||||
|
||||
time.sleep(2) # plus extra delay for notifications to fire
|
||||
|
||||
|
||||
# Check no errors were recorded, because we asked for 204 which is slightly uncommon but is still OK
|
||||
res = get_index(client)
|
||||
assert b'notification-error' not in res.data
|
||||
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
x = f.read()
|
||||
j = json.loads(x)
|
||||
@@ -360,7 +372,10 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
|
||||
#live_server_setup(live_server)
|
||||
set_original_response()
|
||||
if os.path.isfile("test-datastore/notification.txt"):
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
os.unlink("test-datastore/notification.txt") \
|
||||
|
||||
# 1995 UTF-8 content should be encoded
|
||||
test_body = 'change detection is cool 网站监测 内容更新了'
|
||||
|
||||
# otherwise other settings would have already existed from previous tests in this file
|
||||
res = client.post(
|
||||
@@ -368,8 +383,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
|
||||
data={
|
||||
"application-fetch_backend": "html_requests",
|
||||
"application-minutes_between_check": 180,
|
||||
#1995 UTF-8 content should be encoded
|
||||
"application-notification_body": 'change detection is cool 网站监测 内容更新了',
|
||||
"application-notification_body": test_body,
|
||||
"application-notification_format": default_notification_format,
|
||||
"application-notification_urls": "",
|
||||
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
||||
@@ -399,12 +413,10 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
|
||||
assert res.status_code != 400
|
||||
assert res.status_code != 500
|
||||
|
||||
# Give apprise time to fire
|
||||
time.sleep(4)
|
||||
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
x = f.read()
|
||||
assert 'change detection is cool 网站监测 内容更新了' in x
|
||||
assert test_body in x
|
||||
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
|
||||
@@ -442,4 +454,67 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
|
||||
assert b"Error: You must have atleast one watch configured for 'test notification' to work" in res.data
|
||||
|
||||
|
||||
def _test_color_notifications(client, notification_body_token):
|
||||
|
||||
from changedetectionio.diff import ADDED_STYLE, REMOVED_STYLE
|
||||
|
||||
set_original_response()
|
||||
|
||||
if os.path.isfile("test-datastore/notification.txt"):
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
|
||||
|
||||
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')+"?xxx={{ watch_url }}&+custom-header=123"
|
||||
|
||||
|
||||
# otherwise other settings would have already existed from previous tests in this file
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
data={
|
||||
"application-fetch_backend": "html_requests",
|
||||
"application-minutes_between_check": 180,
|
||||
"application-notification_body": notification_body_token,
|
||||
"application-notification_format": "HTML Color",
|
||||
"application-notification_urls": test_notification_url,
|
||||
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'nice one'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Watch added" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
set_modified_response()
|
||||
|
||||
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(3)
|
||||
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
x = f.read()
|
||||
assert f'<span style="{REMOVED_STYLE}">Which is across multiple lines' in x
|
||||
|
||||
|
||||
client.get(
|
||||
url_for("form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
def test_html_color_notifications(client, live_server, measure_memory_usage):
|
||||
|
||||
#live_server_setup(live_server)
|
||||
_test_color_notifications(client, '{{diff}}')
|
||||
_test_color_notifications(client, '{{diff_full}}')
|
||||
|
||||
@@ -373,13 +373,14 @@ def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
|
||||
wait_for_all_checks(client)
|
||||
|
||||
with open('test-datastore/headers-testtag.txt', 'w') as f:
|
||||
f.write("tag-header: test")
|
||||
f.write("tag-header: test\r\nurl-header: http://example.com")
|
||||
|
||||
with open('test-datastore/headers.txt', 'w') as f:
|
||||
f.write("global-header: nice\r\nnext-global-header: nice")
|
||||
f.write("global-header: nice\r\nnext-global-header: nice\r\nurl-header-global: http://example.com/global")
|
||||
|
||||
with open('test-datastore/' + extract_UUID_from_client(client) + '/headers.txt', 'w') as f:
|
||||
f.write("watch-header: nice")
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
with open(f'test-datastore/{uuid}/headers.txt', 'w') as f:
|
||||
f.write("watch-header: nice\r\nurl-header-watch: http://example.com/watch")
|
||||
|
||||
wait_for_all_checks(client)
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
@@ -410,6 +411,9 @@ def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
|
||||
assert b"Xxx:ooo" in res.data
|
||||
assert b"Watch-Header:nice" in res.data
|
||||
assert b"Tag-Header:test" in res.data
|
||||
assert b"Url-Header:http://example.com" in res.data
|
||||
assert b"Url-Header-Global:http://example.com/global" in res.data
|
||||
assert b"Url-Header-Watch:http://example.com/watch" in res.data
|
||||
|
||||
# Check the custom UA from system settings page made it through
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||
|
||||
@@ -189,6 +189,17 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
|
||||
|
||||
client.get(url_for("mark_all_viewed"))
|
||||
|
||||
|
||||
# 2715 - Price detection (once it crosses the "lower" threshold) again with a lower price - should trigger again!
|
||||
set_original_response(props_markup=instock_props[0], price='820.45')
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'820.45' in res.data
|
||||
assert b'unviewed' in res.data
|
||||
client.get(url_for("mark_all_viewed"))
|
||||
|
||||
# price changed to something MORE than max (1100.10), SHOULD be a change
|
||||
set_original_response(props_markup=instock_props[0], price='1890.45')
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
@@ -203,7 +214,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
|
||||
|
||||
|
||||
def test_restock_itemprop_minmax(client, live_server):
|
||||
# live_server_setup(live_server)
|
||||
#live_server_setup(live_server)
|
||||
extras = {
|
||||
"restock_settings-follow_price_changes": "y",
|
||||
"restock_settings-price_change_min": 900.0,
|
||||
@@ -369,7 +380,7 @@ def test_change_with_notification_values(client, live_server):
|
||||
|
||||
## Now test the "SEND TEST NOTIFICATION" is working
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.post(url_for("ajax_callback_send_notification_test", watch_uuid=uuid), data={}, follow_redirects=True)
|
||||
time.sleep(5)
|
||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
|
||||
|
||||
@@ -132,7 +132,7 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
|
||||
)
|
||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid=uuid, unpause_on_save=1),
|
||||
data={
|
||||
|
||||
@@ -39,7 +39,7 @@ def test_check_basic_scheduler_functionality(client, live_server, measure_memory
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# Setup all the days of the weeks using XXX as the placeholder for monday/tuesday/etc
|
||||
|
||||
@@ -104,7 +104,7 @@ def test_check_basic_global_scheduler_functionality(client, live_server, measure
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# Setup all the days of the weeks using XXX as the placeholder for monday/tuesday/etc
|
||||
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
import os
|
||||
|
||||
from flask import url_for
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||
import time
|
||||
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
from .. import strtobool
|
||||
|
||||
|
||||
@@ -61,54 +59,44 @@ def test_bad_access(client, live_server, measure_memory_usage):
|
||||
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
|
||||
|
||||
|
||||
def test_file_slashslash_access(client, live_server, measure_memory_usage):
|
||||
#live_server_setup(live_server)
|
||||
def _runner_test_various_file_slash(client, file_uri):
|
||||
|
||||
test_file_path = os.path.abspath(__file__)
|
||||
|
||||
# file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": f"file://{test_file_path}", "tags": ''},
|
||||
data={"url": file_uri, "tags": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
|
||||
substrings = [b"URLs with hostname components are not permitted", b"No connection adapters were found for"]
|
||||
|
||||
|
||||
# If it is enabled at test time
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
if file_uri.startswith('file:///'):
|
||||
# This one should be the full qualified path to the file and should get the contents of this file
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'_runner_test_various_file_slash' in res.data
|
||||
else:
|
||||
# This will give some error from requests or if it went to chrome, will give some other error :-)
|
||||
assert any(s in res.data for s in substrings)
|
||||
|
||||
assert b"test_file_slashslash_access" in res.data
|
||||
else:
|
||||
# Default should be here
|
||||
assert b'file:// type access is denied for security reasons.' in res.data
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_file_slash_access(client, live_server, measure_memory_usage):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
# file: is NOT permitted by default, so it will be caught by ALLOW_FILE_URI check
|
||||
|
||||
test_file_path = os.path.abspath(__file__)
|
||||
|
||||
# file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": f"file:/{test_file_path}", "tags": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
|
||||
# If it is enabled at test time
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
# So it should permit it, but it should fall back to the 'requests' library giving an error
|
||||
# (but means it gets passed to playwright etc)
|
||||
assert b"URLs with hostname components are not permitted" in res.data
|
||||
else:
|
||||
# Default should be here
|
||||
assert b'file:// type access is denied for security reasons.' in res.data
|
||||
_runner_test_various_file_slash(client, file_uri=f"file://{test_file_path}")
|
||||
_runner_test_various_file_slash(client, file_uri=f"file:/{test_file_path}")
|
||||
_runner_test_various_file_slash(client, file_uri=f"file:{test_file_path}") # CVE-2024-56509
|
||||
|
||||
def test_xss(client, live_server, measure_memory_usage):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
64
changedetectionio/tests/unit/test_semver.py
Normal file
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# run from dir above changedetectionio/ dir
|
||||
# python3 -m unittest changedetectionio.tests.unit.test_semver
|
||||
|
||||
import re
|
||||
import unittest
|
||||
|
||||
|
||||
# The SEMVER regex
|
||||
SEMVER_REGEX = r"^(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
|
||||
|
||||
# Compile the regex
|
||||
semver_pattern = re.compile(SEMVER_REGEX)
|
||||
|
||||
class TestSemver(unittest.TestCase):
|
||||
def test_valid_versions(self):
|
||||
"""Test valid semantic version strings"""
|
||||
valid_versions = [
|
||||
"1.0.0",
|
||||
"0.1.0",
|
||||
"0.0.1",
|
||||
"1.0.0-alpha",
|
||||
"1.0.0-alpha.1",
|
||||
"1.0.0-0.3.7",
|
||||
"1.0.0-x.7.z.92",
|
||||
"1.0.0-alpha+001",
|
||||
"1.0.0+20130313144700",
|
||||
"1.0.0-beta+exp.sha.5114f85"
|
||||
]
|
||||
for version in valid_versions:
|
||||
with self.subTest(version=version):
|
||||
self.assertIsNotNone(semver_pattern.match(version), f"Version {version} should be valid")
|
||||
|
||||
def test_invalid_versions(self):
|
||||
"""Test invalid semantic version strings"""
|
||||
invalid_versions = [
|
||||
"0.48.06",
|
||||
"1.0",
|
||||
"1.0.0-",
|
||||
# Seems to pass the semver.org regex?
|
||||
# "1.0.0-alpha-",
|
||||
"1.0.0+",
|
||||
"1.0.0-alpha+",
|
||||
"1.0.0-",
|
||||
"01.0.0",
|
||||
"1.01.0",
|
||||
"1.0.01",
|
||||
".1.0.0",
|
||||
"1..0.0"
|
||||
]
|
||||
for version in invalid_versions:
|
||||
with self.subTest(version=version):
|
||||
res = semver_pattern.match(version)
|
||||
self.assertIsNone(res, f"Version '{version}' should be invalid")
|
||||
|
||||
def test_our_version(self):
|
||||
from changedetectionio import get_version
|
||||
our_version = get_version()
|
||||
self.assertIsNotNone(semver_pattern.match(our_version), f"Our version '{our_version}' should be a valid SEMVER string")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -16,7 +16,6 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
watch = Watch.model(datastore_path='/tmp', default={})
|
||||
watch.ensure_data_dir_exists()
|
||||
|
||||
watch['last_viewed'] = 110
|
||||
|
||||
# Contents from the browser are always returned from the browser/requests/etc as str, str is basically UTF-16 in python
|
||||
watch.save_history_text(contents="hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
|
||||
@@ -25,31 +24,42 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
watch.save_history_text(contents="hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents="hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents="hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4()))
|
||||
|
||||
p = watch.get_from_version_based_on_last_viewed
|
||||
assert p == "100", "Correct 'last viewed' timestamp was detected"
|
||||
|
||||
p = watch.get_next_snapshot_key_to_last_viewed
|
||||
assert p == "112", "Correct last-viewed timestamp was detected"
|
||||
watch['last_viewed'] = 110
|
||||
p = watch.get_from_version_based_on_last_viewed
|
||||
assert p == "109", "Correct 'last viewed' timestamp was detected"
|
||||
|
||||
# When there is only one step of difference from the end of the list, it should return second-last change
|
||||
watch['last_viewed'] = 116
|
||||
p = watch.get_next_snapshot_key_to_last_viewed
|
||||
assert p == "115", "Correct 'second last' last-viewed timestamp was detected when using the last timestamp"
|
||||
p = watch.get_from_version_based_on_last_viewed
|
||||
assert p == "115", "Correct 'last viewed' timestamp was detected"
|
||||
|
||||
watch['last_viewed'] = 99
|
||||
p = watch.get_next_snapshot_key_to_last_viewed
|
||||
assert p == "100"
|
||||
p = watch.get_from_version_based_on_last_viewed
|
||||
assert p == "100", "When the 'last viewed' timestamp is less than the oldest snapshot, return oldest"
|
||||
|
||||
watch['last_viewed'] = 200
|
||||
p = watch.get_next_snapshot_key_to_last_viewed
|
||||
assert p == "115", "When the 'last viewed' timestamp is greater than the newest snapshot, return second last "
|
||||
p = watch.get_from_version_based_on_last_viewed
|
||||
assert p == "115", "When the 'last viewed' timestamp is greater than the newest snapshot, return second newest"
|
||||
|
||||
watch['last_viewed'] = 109
|
||||
p = watch.get_next_snapshot_key_to_last_viewed
|
||||
p = watch.get_from_version_based_on_last_viewed
|
||||
assert p == "109", "Correct when its the same time"
|
||||
|
||||
# new empty one
|
||||
watch = Watch.model(datastore_path='/tmp', default={})
|
||||
p = watch.get_next_snapshot_key_to_last_viewed
|
||||
p = watch.get_from_version_based_on_last_viewed
|
||||
assert p == None, "None when no history available"
|
||||
|
||||
watch.save_history_text(contents="hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
|
||||
p = watch.get_from_version_based_on_last_viewed
|
||||
assert p == "100", "Correct with only one history snapshot"
|
||||
|
||||
watch['last_viewed'] = 200
|
||||
p = watch.get_from_version_based_on_last_viewed
|
||||
assert p == "100", "Correct with only one history snapshot"
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -76,6 +76,14 @@ def set_more_modified_response():
|
||||
return None
|
||||
|
||||
|
||||
def set_empty_text_response():
|
||||
test_return_data = """<html><body></body></html>"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
return None
|
||||
|
||||
def wait_for_notification_endpoint_output():
|
||||
'''Apprise can take a few seconds to fire'''
|
||||
#@todo - could check the apprise object directly instead of looking for this file
|
||||
@@ -215,9 +223,10 @@ def live_server_setup(live_server):
|
||||
def test_method():
|
||||
return request.method
|
||||
|
||||
# Where we POST to as a notification
|
||||
@live_server.app.route('/test_notification_endpoint', methods=['POST', 'GET'])
|
||||
# Where we POST to as a notification, also use a space here to test URL escaping is OK across all tests that use this. ( #2868 )
|
||||
@live_server.app.route('/test_notification endpoint', methods=['POST', 'GET'])
|
||||
def test_notification_endpoint():
|
||||
|
||||
with open("test-datastore/notification.txt", "wb") as f:
|
||||
# Debug method, dump all POST to file also, used to prove #65
|
||||
data = request.stream.read()
|
||||
@@ -235,8 +244,11 @@ def live_server_setup(live_server):
|
||||
f.write(request.content_type)
|
||||
|
||||
print("\n>> Test notification endpoint was hit.\n", data)
|
||||
return "Text was set"
|
||||
|
||||
content = "Text was set"
|
||||
status_code = request.args.get('status_code',200)
|
||||
resp = make_response(content, status_code)
|
||||
return resp
|
||||
|
||||
# Just return the verb in the request
|
||||
@live_server.app.route('/test-basicauth', methods=['GET'])
|
||||
@@ -273,15 +285,43 @@ def live_server_setup(live_server):
|
||||
<p id="remove">This text should be removed</p>
|
||||
<form onsubmit="event.preventDefault();">
|
||||
<!-- obfuscated text so that we dont accidentally get a false positive due to conversion of the source :) --->
|
||||
<button name="test-button" onclick="getElementById('remove').remove();getElementById('some-content').innerHTML = atob('SSBzbWVsbCBKYXZhU2NyaXB0IGJlY2F1c2UgdGhlIGJ1dHRvbiB3YXMgcHJlc3NlZCE=')">Click here</button>
|
||||
<div id=some-content></div>
|
||||
<button name="test-button" onclick="
|
||||
getElementById('remove').remove();
|
||||
getElementById('some-content').innerHTML = atob('SSBzbWVsbCBKYXZhU2NyaXB0IGJlY2F1c2UgdGhlIGJ1dHRvbiB3YXMgcHJlc3NlZCE=');
|
||||
getElementById('reflect-text').innerHTML = getElementById('test-input-text').value;
|
||||
">Click here</button>
|
||||
|
||||
<div id="some-content"></div>
|
||||
|
||||
<pre>
|
||||
{header_text.lower()}
|
||||
</pre>
|
||||
</body>
|
||||
|
||||
<br>
|
||||
<!-- used for testing that the jinja2 compiled here --->
|
||||
<input type="text" value="" id="test-input-text" /><br>
|
||||
<div id="reflect-text">Waiting to reflect text from #test-input-text here</div>
|
||||
</form>
|
||||
|
||||
</body>
|
||||
</html>""", 200)
|
||||
resp.headers['Content-Type'] = 'text/html'
|
||||
return resp
|
||||
|
||||
live_server.start()
|
||||
|
||||
def get_index(client):
|
||||
import inspect
|
||||
# Get the caller's frame (parent function)
|
||||
frame = inspect.currentframe()
|
||||
caller_frame = frame.f_back # Go back to the caller's frame
|
||||
caller_name = caller_frame.f_code.co_name
|
||||
caller_line = caller_frame.f_lineno
|
||||
|
||||
print(f"Called by: {caller_name}, Line: {caller_line}")
|
||||
|
||||
res = client.get(url_for("index"))
|
||||
with open(f"test-datastore/index-{caller_name}-{caller_line}.html", 'wb') as f:
|
||||
f.write(res.data)
|
||||
|
||||
return res
|
||||
|
||||
@@ -2,14 +2,16 @@
|
||||
|
||||
import os
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
from ..util import live_server_setup, wait_for_all_checks, get_index
|
||||
|
||||
def test_setup(client, live_server, measure_memory_usage):
|
||||
def test_setup(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
|
||||
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
|
||||
def test_visual_selector_content_ready(client, live_server, measure_memory_usage):
|
||||
live_server.stop()
|
||||
live_server.start()
|
||||
|
||||
import os
|
||||
import json
|
||||
@@ -27,7 +29,7 @@ def test_visual_selector_content_ready(client, live_server, measure_memory_usage
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid=uuid, unpause_on_save=1),
|
||||
data={
|
||||
@@ -87,7 +89,9 @@ def test_visual_selector_content_ready(client, live_server, measure_memory_usage
|
||||
|
||||
def test_basic_browserstep(client, live_server, measure_memory_usage):
|
||||
|
||||
#live_server_setup(live_server)
|
||||
live_server.stop()
|
||||
live_server.start()
|
||||
|
||||
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||
|
||||
test_url = url_for('test_interactive_html_endpoint', _external=True)
|
||||
@@ -108,9 +112,13 @@ def test_basic_browserstep(client, live_server, measure_memory_usage):
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
'fetch_backend': "html_webdriver",
|
||||
'browser_steps-0-operation': 'Click element',
|
||||
'browser_steps-0-selector': 'button[name=test-button]',
|
||||
'browser_steps-0-optional_value': '',
|
||||
'browser_steps-0-operation': 'Enter text in field',
|
||||
'browser_steps-0-selector': '#test-input-text',
|
||||
# Should get set to the actual text (jinja2 rendered)
|
||||
'browser_steps-0-optional_value': "Hello-Jinja2-{% now 'Europe/Berlin', '%Y-%m-%d' %}",
|
||||
'browser_steps-1-operation': 'Click element',
|
||||
'browser_steps-1-selector': 'button[name=test-button]',
|
||||
'browser_steps-1-optional_value': '',
|
||||
# For now, cookies doesnt work in headers because it must be a full cookiejar object
|
||||
'headers': "testheader: yes\buser-agent: MyCustomAgent",
|
||||
},
|
||||
@@ -119,7 +127,7 @@ def test_basic_browserstep(client, live_server, measure_memory_usage):
|
||||
assert b"unpaused" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n >= 1, "Watch history had atleast 1 (everything fetched OK)"
|
||||
|
||||
assert b"This text should be removed" not in res.data
|
||||
@@ -132,13 +140,32 @@ def test_basic_browserstep(client, live_server, measure_memory_usage):
|
||||
assert b"This text should be removed" not in res.data
|
||||
assert b"I smell JavaScript because the button was pressed" in res.data
|
||||
|
||||
assert b'Hello-Jinja2-20' in res.data
|
||||
|
||||
assert b"testheader: yes" in res.data
|
||||
assert b"user-agent: mycustomagent" in res.data
|
||||
live_server.stop()
|
||||
|
||||
def test_non_200_errors_report_browsersteps(client, live_server):
|
||||
|
||||
live_server.stop()
|
||||
live_server.start()
|
||||
|
||||
four_o_four_url = url_for('test_endpoint', status_code=404, _external=True)
|
||||
four_o_four_url = four_o_four_url.replace('localhost.localdomain', 'cdio')
|
||||
four_o_four_url = four_o_four_url.replace('localhost', 'cdio')
|
||||
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": four_o_four_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# now test for 404 errors
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid=uuid, unpause_on_save=1),
|
||||
@@ -153,12 +180,14 @@ def test_basic_browserstep(client, live_server, measure_memory_usage):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"unpaused" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("index"))
|
||||
res = get_index(client)
|
||||
|
||||
assert b'Error - 404' in res.data
|
||||
|
||||
client.get(
|
||||
url_for("form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
)
|
||||
|
||||
@@ -28,6 +28,8 @@ class update_worker(threading.Thread):
|
||||
|
||||
def queue_notification_for_watch(self, notification_q, n_object, watch):
|
||||
from changedetectionio import diff
|
||||
from changedetectionio.notification import default_notification_format_for_watch
|
||||
|
||||
dates = []
|
||||
trigger_text = ''
|
||||
|
||||
@@ -44,11 +46,21 @@ class update_worker(threading.Thread):
|
||||
else:
|
||||
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
|
||||
|
||||
# If we ended up here with "System default"
|
||||
if n_object.get('notification_format') == default_notification_format_for_watch:
|
||||
n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
html_colour_enable = False
|
||||
# HTML needs linebreak, but MarkDown and Text can use a linefeed
|
||||
if n_object.get('notification_format') == 'HTML':
|
||||
line_feed_sep = "<br>"
|
||||
# Snapshot will be plaintext on the disk, convert to some kind of HTML
|
||||
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
|
||||
elif n_object.get('notification_format') == 'HTML Color':
|
||||
line_feed_sep = "<br>"
|
||||
# Snapshot will be plaintext on the disk, convert to some kind of HTML
|
||||
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
|
||||
html_colour_enable = True
|
||||
else:
|
||||
line_feed_sep = "\n"
|
||||
|
||||
@@ -69,9 +81,9 @@ class update_worker(threading.Thread):
|
||||
|
||||
n_object.update({
|
||||
'current_snapshot': snapshot_contents,
|
||||
'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep),
|
||||
'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
|
||||
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep),
|
||||
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep),
|
||||
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
|
||||
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True),
|
||||
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep),
|
||||
'notification_timestamp': now,
|
||||
@@ -231,7 +243,6 @@ class update_worker(threading.Thread):
|
||||
os.unlink(full_path)
|
||||
|
||||
def run(self):
|
||||
now = time.time()
|
||||
|
||||
while not self.app.config.exit.is_set():
|
||||
update_handler = None
|
||||
@@ -242,6 +253,7 @@ class update_worker(threading.Thread):
|
||||
pass
|
||||
|
||||
else:
|
||||
fetch_start_time = time.time()
|
||||
uuid = queued_item_data.item.get('uuid')
|
||||
self.current_uuid = uuid
|
||||
if uuid in list(self.datastore.data['watching'].keys()) and self.datastore.data['watching'][uuid].get('url'):
|
||||
@@ -256,7 +268,6 @@ class update_worker(threading.Thread):
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
|
||||
logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
# Processor is what we are using for detecting the "Change"
|
||||
@@ -276,6 +287,10 @@ class update_worker(threading.Thread):
|
||||
|
||||
update_handler.call_browser()
|
||||
|
||||
# In reality, the actual time of when the change was detected could be a few seconds after this
|
||||
# For example it should include when the page stopped rendering if using a playwright/chrome type fetch
|
||||
fetch_start_time = time.time()
|
||||
|
||||
changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
|
||||
|
||||
# Re #342
|
||||
@@ -500,7 +515,7 @@ class update_worker(threading.Thread):
|
||||
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
#
|
||||
|
||||
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
|
||||
if process_changedetection_results:
|
||||
|
||||
@@ -513,8 +528,6 @@ class update_worker(threading.Thread):
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.")
|
||||
|
||||
# Now update after running everything
|
||||
timestamp = round(time.time())
|
||||
try:
|
||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
@@ -530,24 +543,28 @@ class update_worker(threading.Thread):
|
||||
|
||||
# Small hack so that we sleep just enough to allow 1 second between history snapshots
|
||||
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
|
||||
|
||||
if watch.newest_history_key and int(timestamp) == int(watch.newest_history_key):
|
||||
# @also - the keys are one per second at the most (for now)
|
||||
if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
|
||||
logger.warning(
|
||||
f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
|
||||
timestamp = str(int(timestamp) + 1)
|
||||
f"Timestamp {fetch_start_time} already exists, waiting 1 seconds so we have a unique key in history.txt")
|
||||
fetch_start_time += 1
|
||||
time.sleep(1)
|
||||
|
||||
watch.save_history_text(contents=contents,
|
||||
timestamp=timestamp,
|
||||
timestamp=int(fetch_start_time),
|
||||
snapshot_id=update_obj.get('previous_md5', 'none'))
|
||||
|
||||
if update_handler.fetcher.content:
|
||||
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=timestamp)
|
||||
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
|
||||
# attribute .last_changed is then based on this data
|
||||
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))
|
||||
|
||||
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
||||
if watch.history_n >= 2:
|
||||
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
|
||||
if not watch.get('notification_muted'):
|
||||
# @todo only run this if notifications exist
|
||||
self.send_content_changed_notification(watch_uuid=uuid)
|
||||
|
||||
except Exception as e:
|
||||
@@ -569,15 +586,15 @@ class update_worker(threading.Thread):
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||
'last_checked': round(time.time()),
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
|
||||
'last_checked': int(fetch_start_time),
|
||||
'check_count': count
|
||||
})
|
||||
|
||||
|
||||
self.current_uuid = None # Done
|
||||
self.q.task_done()
|
||||
logger.debug(f"Watch {uuid} done in {time.time()-now:.2f}s")
|
||||
logger.debug(f"Watch {uuid} done in {time.time()-fetch_start_time:.2f}s")
|
||||
|
||||
# Give the CPU time to interrupt
|
||||
time.sleep(0.1)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
version: '3.2'
|
||||
services:
|
||||
changedetection:
|
||||
image: ghcr.io/dgtlmoon/changedetection.io
|
||||
@@ -12,9 +11,6 @@ services:
|
||||
# environment:
|
||||
# Default listening port, can also be changed with the -p option
|
||||
# - PORT=5000
|
||||
|
||||
# - PUID=1000
|
||||
# - PGID=1000
|
||||
#
|
||||
# Log levels are in descending order. (TRACE is the most detailed one)
|
||||
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
|
||||
@@ -85,7 +81,7 @@ services:
|
||||
|
||||
|
||||
# Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
|
||||
# RECOMMENDED FOR FETCHING PAGES WITH CHROME
|
||||
# RECOMMENDED FOR FETCHING PAGES WITH CHROME, be sure to enable the "PLAYWRIGHT_DRIVER_URL" env variable in the main changedetection container
|
||||
# sockpuppetbrowser:
|
||||
# hostname: sockpuppetbrowser
|
||||
# image: dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
BIN
docs/scheduler.png
Normal file
|
After Width: | Height: | Size: 64 KiB |
@@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes
|
||||
# jq not available on Windows so must be installed manually
|
||||
|
||||
# Notification library
|
||||
apprise==1.9.0
|
||||
apprise==1.9.2
|
||||
|
||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||
# use any version other than 2.0.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
|
||||
@@ -95,3 +95,11 @@ babel
|
||||
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
|
||||
greenlet >= 3.0.3
|
||||
|
||||
# Pinned or it causes problems with flask_expects_json which seems unmaintained
|
||||
referencing==0.35.1
|
||||
|
||||
# Scheduler - Windows seemed to miss a lot of default timezone info (even "UTC" !)
|
||||
tzdata
|
||||
|
||||
pymongo>=4.3.3
|
||||
boto3>=1.26.0
|
||||
|
||||