Compare commits
35 Commits
0.49.0
...
abstracted
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
80434fa16a | ||
|
|
db10422415 | ||
|
|
380e571ded | ||
|
|
1c2cfc37aa | ||
|
|
0634fe021d | ||
|
|
04934b6b3b | ||
|
|
ff00417bc5 | ||
|
|
849c5b2293 | ||
|
|
4bf560256b | ||
|
|
7903b03a0c | ||
|
|
5e7c0880c1 | ||
|
|
957aef4ff3 | ||
|
|
8e9a83d8f4 | ||
|
|
5961838143 | ||
|
|
8cf4a8128b | ||
|
|
24c3bfe5ad | ||
|
|
bdd9760f3c | ||
|
|
e37467f649 | ||
|
|
d42fdf0257 | ||
|
|
939fa86582 | ||
|
|
b87c92b9e0 | ||
|
|
4d5535d72c | ||
|
|
ad08219d03 | ||
|
|
82211eef82 | ||
|
|
5d9380609c | ||
|
|
a8b3918fca | ||
|
|
e83fb37fb6 | ||
|
|
6b99afe0f7 | ||
|
|
09ebc6ec63 | ||
|
|
6b1065502e | ||
|
|
d4c470984a | ||
|
|
55da48f719 | ||
|
|
dbd4adf23a | ||
|
|
b1e700b3ff | ||
|
|
1c61b5a623 |
19
.github/workflows/containers.yml
vendored
@@ -103,6 +103,19 @@ jobs:
|
||||
# provenance: false
|
||||
|
||||
# A new tagged release is required, which builds :tag and :latest
|
||||
- name: Docker meta :tag
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta
|
||||
with:
|
||||
images: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
|
||||
ghcr.io/dgtlmoon/changedetection.io
|
||||
tags: |
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=semver,pattern={{major}}
|
||||
|
||||
- name: Build and push :tag
|
||||
id: docker_build_tag_release
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
@@ -111,11 +124,7 @@ jobs:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }}
|
||||
ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest
|
||||
ghcr.io/dgtlmoon/changedetection.io:latest
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
6
.github/workflows/pypi-release.yml
vendored
@@ -45,8 +45,12 @@ jobs:
|
||||
- name: Test that the basic pip built package runs without error
|
||||
run: |
|
||||
set -ex
|
||||
pip3 install dist/changedetection.io*.whl
|
||||
ls -alR
|
||||
|
||||
# Find and install the first .whl file
|
||||
find dist -type f -name "*.whl" -exec pip3 install {} \; -quit
|
||||
changedetection.io -d /tmp -p 10000 &
|
||||
|
||||
sleep 3
|
||||
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
|
||||
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null
|
||||
|
||||
@@ -5,6 +5,7 @@ recursive-include changedetectionio/content_fetchers *
|
||||
recursive-include changedetectionio/model *
|
||||
recursive-include changedetectionio/processors *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/storage *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/tests *
|
||||
prune changedetectionio/static/package-lock.json
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.49.0'
|
||||
__version__ = '0.49.4'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -112,6 +112,35 @@ def build_watch_json_schema(d):
|
||||
|
||||
schema['properties']['time_between_check'] = build_time_between_check_json_schema()
|
||||
|
||||
schema['properties']['browser_steps'] = {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"operation": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000 # Allows null and any string up to 5000 chars (including "")
|
||||
},
|
||||
"selector": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000
|
||||
},
|
||||
"optional_value": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000
|
||||
}
|
||||
},
|
||||
"required": ["operation", "selector", "optional_value"],
|
||||
"additionalProperties": False # No extra keys allowed
|
||||
}
|
||||
},
|
||||
{"type": "null"}, # Allows null for `browser_steps`
|
||||
{"type": "array", "maxItems": 0} # Allows empty array []
|
||||
]
|
||||
}
|
||||
|
||||
# headers ?
|
||||
return schema
|
||||
|
||||
|
||||
@@ -22,7 +22,10 @@ from loguru import logger
|
||||
|
||||
browsersteps_sessions = {}
|
||||
io_interface_context = None
|
||||
|
||||
import json
|
||||
import base64
|
||||
import hashlib
|
||||
from flask import Response
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
||||
@@ -85,7 +88,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browsersteps_start_session['browser'],
|
||||
proxy=proxy,
|
||||
start_url=datastore.data['watching'][watch_uuid].get('url'),
|
||||
start_url=datastore.data['watching'][watch_uuid].link,
|
||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||
)
|
||||
|
||||
@@ -160,14 +163,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
if not browsersteps_sessions.get(browsersteps_session_id):
|
||||
return make_response('No session exists under that ID', 500)
|
||||
|
||||
|
||||
is_last_step = False
|
||||
# Actions - step/apply/etc, do the thing and return state
|
||||
if request.method == 'POST':
|
||||
# @todo - should always be an existing session
|
||||
step_operation = request.form.get('operation')
|
||||
step_selector = request.form.get('selector')
|
||||
step_optional_value = request.form.get('optional_value')
|
||||
step_n = int(request.form.get('step_n'))
|
||||
is_last_step = strtobool(request.form.get('is_last_step'))
|
||||
|
||||
# @todo try.. accept.. nice errors not popups..
|
||||
@@ -182,16 +184,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Try to find something of value to give back to the user
|
||||
return make_response(str(e).splitlines()[0], 401)
|
||||
|
||||
# Get visual selector ready/update its data (also use the current filter info from the page?)
|
||||
# When the last 'apply' button was pressed
|
||||
# @todo this adds overhead because the xpath selection is happening twice
|
||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||
if is_last_step and u:
|
||||
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch:
|
||||
watch.save_screenshot(screenshot=screenshot)
|
||||
watch.save_xpath_data(data=xpath_data)
|
||||
|
||||
# if not this_session.page:
|
||||
# cleanup_playwright_session()
|
||||
@@ -199,31 +191,35 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
# Screenshots and other info only needed on requesting a step (POST)
|
||||
try:
|
||||
state = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||
if is_last_step:
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||
if watch and u:
|
||||
watch.save_screenshot(screenshot=screenshot)
|
||||
watch.save_xpath_data(data=xpath_data)
|
||||
|
||||
except playwright._impl._api_types.Error as e:
|
||||
return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
|
||||
except Exception as e:
|
||||
return make_response("Error fetching screenshot and element data - " + str(e), 401)
|
||||
|
||||
# Use send_file() which is way faster than read/write loop on bytes
|
||||
import json
|
||||
from tempfile import mkstemp
|
||||
from flask import send_file
|
||||
tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-")
|
||||
# SEND THIS BACK TO THE BROWSER
|
||||
|
||||
output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format(
|
||||
base64.b64encode(state[0]).decode('ascii')),
|
||||
'xpath_data': state[1],
|
||||
'session_age_start': browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
|
||||
'browser_time_remaining': round(remaining)
|
||||
})
|
||||
output = {
|
||||
"screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}",
|
||||
"xpath_data": xpath_data,
|
||||
"session_age_start": browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
|
||||
"browser_time_remaining": round(remaining)
|
||||
}
|
||||
json_data = json.dumps(output)
|
||||
|
||||
with os.fdopen(tmp_fd, 'w') as f:
|
||||
f.write(output)
|
||||
# Generate an ETag (hash of the response body)
|
||||
etag_hash = hashlib.md5(json_data.encode('utf-8')).hexdigest()
|
||||
|
||||
response = make_response(send_file(path_or_file=tmp_file,
|
||||
mimetype='application/json; charset=UTF-8',
|
||||
etag=True))
|
||||
# No longer needed
|
||||
os.unlink(tmp_file)
|
||||
# Create the response with ETag
|
||||
response = Response(json_data, mimetype="application/json; charset=UTF-8")
|
||||
response.set_etag(etag_hash)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import time
|
||||
import re
|
||||
from random import randint
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
|
||||
|
||||
|
||||
# Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
|
||||
# 0- off, 1- on
|
||||
browser_step_ui_config = {'Choose one': '0 0',
|
||||
@@ -31,6 +32,7 @@ browser_step_ui_config = {'Choose one': '0 0',
|
||||
# 'Extract text and use as filter': '1 0',
|
||||
'Goto site': '0 0',
|
||||
'Goto URL': '0 1',
|
||||
'Make all child elements visible': '1 0',
|
||||
'Press Enter': '0 0',
|
||||
'Select by label': '1 1',
|
||||
'Scroll down': '0 0',
|
||||
@@ -38,6 +40,7 @@ browser_step_ui_config = {'Choose one': '0 0',
|
||||
'Wait for seconds': '0 1',
|
||||
'Wait for text': '0 1',
|
||||
'Wait for text in element': '1 1',
|
||||
'Remove elements': '1 0',
|
||||
# 'Press Page Down': '0 0',
|
||||
# 'Press Page Up': '0 0',
|
||||
# weird bug, come back to it later
|
||||
@@ -52,6 +55,8 @@ class steppable_browser_interface():
|
||||
page = None
|
||||
start_url = None
|
||||
|
||||
action_timeout = 10 * 1000
|
||||
|
||||
def __init__(self, start_url):
|
||||
self.start_url = start_url
|
||||
|
||||
@@ -102,7 +107,7 @@ class steppable_browser_interface():
|
||||
return
|
||||
elem = self.page.get_by_text(value)
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=3000)
|
||||
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
|
||||
def action_click_element_containing_text_if_exists(self, selector=None, value=''):
|
||||
logger.debug("Clicking element containing text if exists")
|
||||
@@ -111,7 +116,7 @@ class steppable_browser_interface():
|
||||
elem = self.page.get_by_text(value)
|
||||
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=3000)
|
||||
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
else:
|
||||
return
|
||||
|
||||
@@ -119,7 +124,7 @@ class steppable_browser_interface():
|
||||
if not len(selector.strip()):
|
||||
return
|
||||
|
||||
self.page.fill(selector, value, timeout=10 * 1000)
|
||||
self.page.fill(selector, value, timeout=self.action_timeout)
|
||||
|
||||
def action_execute_js(self, selector, value):
|
||||
response = self.page.evaluate(value)
|
||||
@@ -130,7 +135,7 @@ class steppable_browser_interface():
|
||||
if not len(selector.strip()):
|
||||
return
|
||||
|
||||
self.page.click(selector=selector, timeout=30 * 1000, delay=randint(200, 500))
|
||||
self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
|
||||
|
||||
def action_click_element_if_exists(self, selector, value):
|
||||
import playwright._impl._errors as _api_types
|
||||
@@ -138,7 +143,7 @@ class steppable_browser_interface():
|
||||
if not len(selector.strip()):
|
||||
return
|
||||
try:
|
||||
self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))
|
||||
self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
|
||||
except _api_types.TimeoutError as e:
|
||||
return
|
||||
except _api_types.Error as e:
|
||||
@@ -185,11 +190,29 @@ class steppable_browser_interface():
|
||||
self.page.keyboard.press("PageDown", delay=randint(200, 500))
|
||||
|
||||
def action_check_checkbox(self, selector, value):
|
||||
self.page.locator(selector).check(timeout=1000)
|
||||
self.page.locator(selector).check(timeout=self.action_timeout)
|
||||
|
||||
def action_uncheck_checkbox(self, selector, value):
|
||||
self.page.locator(selector, timeout=1000).uncheck(timeout=1000)
|
||||
self.page.locator(selector).uncheck(timeout=self.action_timeout)
|
||||
|
||||
def action_remove_elements(self, selector, value):
|
||||
"""Removes all elements matching the given selector from the DOM."""
|
||||
self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
|
||||
|
||||
def action_make_all_child_elements_visible(self, selector, value):
|
||||
"""Recursively makes all child elements inside the given selector fully visible."""
|
||||
self.page.locator(selector).locator("*").evaluate_all("""
|
||||
els => els.forEach(el => {
|
||||
el.style.display = 'block'; // Forces it to be displayed
|
||||
el.style.visibility = 'visible'; // Ensures it's not hidden
|
||||
el.style.opacity = '1'; // Fully opaque
|
||||
el.style.position = 'relative'; // Avoids 'absolute' hiding
|
||||
el.style.height = 'auto'; // Expands collapsed elements
|
||||
el.style.width = 'auto'; // Ensures full visibility
|
||||
el.removeAttribute('hidden'); // Removes hidden attribute
|
||||
el.classList.remove('hidden', 'd-none'); // Removes common CSS hidden classes
|
||||
})
|
||||
""")
|
||||
|
||||
# Responsible for maintaining a live 'context' with the chrome CDP
|
||||
# @todo - how long do contexts live for anyway?
|
||||
@@ -257,6 +280,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
|
||||
def mark_as_closed(self):
|
||||
logger.debug("Page closed, cleaning up..")
|
||||
|
||||
@@ -274,39 +298,30 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
now = time.time()
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
# The actual screenshot
|
||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
|
||||
|
||||
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
||||
logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
||||
screenshot = capture_stitched_together_full_page(self.page)
|
||||
else:
|
||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
|
||||
|
||||
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
||||
|
||||
now = time.time()
|
||||
self.page.evaluate("var include_filters=''")
|
||||
# Go find the interactive elements
|
||||
# @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
|
||||
elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
|
||||
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements)
|
||||
|
||||
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
||||
# So the JS will find the smallest one first
|
||||
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
||||
logger.debug(f"Time to complete get_current_state of browser {time.time()-now:.2f}s")
|
||||
# except
|
||||
logger.debug(f"Time to scrape xpath element data in browser {time.time()-now:.2f}s")
|
||||
|
||||
# playwright._impl._api_types.Error: Browser closed.
|
||||
# @todo show some countdown timer?
|
||||
return (screenshot, xpath_data)
|
||||
|
||||
def request_visualselector_data(self):
|
||||
"""
|
||||
Does the same that the playwright operation in content_fetcher does
|
||||
This is used to just bump the VisualSelector data so it' ready to go if they click on the tab
|
||||
@todo refactor and remove duplicate code, add include_filters
|
||||
:param xpath_data:
|
||||
:param screenshot:
|
||||
:param current_include_filters:
|
||||
:return:
|
||||
"""
|
||||
import importlib.resources
|
||||
self.page.evaluate("var include_filters=''")
|
||||
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
||||
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
||||
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
|
||||
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||
|
||||
return (screenshot, xpath_data)
|
||||
|
||||
104
changedetectionio/content_fetchers/helpers.py
Normal file
@@ -0,0 +1,104 @@
|
||||
|
||||
# Pages with a vertical height longer than this will use the 'stitch together' method.
|
||||
|
||||
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
|
||||
# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
|
||||
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
|
||||
|
||||
|
||||
# The size at which we will switch to stitching method
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD=8000
|
||||
|
||||
from loguru import logger
|
||||
|
||||
def capture_stitched_together_full_page(page):
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4 # Maximum total height for the final image (When in stitch mode)
|
||||
MAX_CHUNK_HEIGHT = 4000 # Height per screenshot chunk
|
||||
WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay
|
||||
|
||||
# Save the original viewport size
|
||||
original_viewport = page.viewport_size
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
viewport = page.viewport_size
|
||||
page_height = page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
# Limit the total capture height
|
||||
capture_height = min(page_height, MAX_TOTAL_HEIGHT)
|
||||
|
||||
images = []
|
||||
total_captured_height = 0
|
||||
|
||||
for offset in range(0, capture_height, MAX_CHUNK_HEIGHT):
|
||||
# Ensure we do not exceed the total height limit
|
||||
chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height)
|
||||
|
||||
# Adjust viewport size for this chunk
|
||||
page.set_viewport_size({"width": viewport["width"], "height": chunk_height})
|
||||
|
||||
# Scroll to the correct position
|
||||
page.evaluate(f"window.scrollTo(0, {offset})")
|
||||
|
||||
# Capture screenshot chunk
|
||||
screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
images.append(Image.open(io.BytesIO(screenshot_bytes)))
|
||||
|
||||
total_captured_height += chunk_height
|
||||
|
||||
# Stop if we reached the maximum total height
|
||||
if total_captured_height >= MAX_TOTAL_HEIGHT:
|
||||
break
|
||||
|
||||
# Create the final stitched image
|
||||
stitched_image = Image.new('RGB', (viewport["width"], total_captured_height))
|
||||
y_offset = 0
|
||||
|
||||
# Stitch the screenshot chunks together
|
||||
for img in images:
|
||||
stitched_image.paste(img, (0, y_offset))
|
||||
y_offset += img.height
|
||||
|
||||
logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s")
|
||||
|
||||
# Overlay warning text if the screenshot was trimmed
|
||||
if page_height > MAX_TOTAL_HEIGHT:
|
||||
draw = ImageDraw.Draw(stitched_image)
|
||||
warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
|
||||
|
||||
# Load font (default system font if Arial is unavailable)
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT) # Arial (Windows/Mac)
|
||||
except IOError:
|
||||
font = ImageFont.load_default() # Default font if Arial not found
|
||||
|
||||
# Get text bounding box (correct method for newer Pillow versions)
|
||||
text_bbox = draw.textbbox((0, 0), warning_text, font=font)
|
||||
text_width = text_bbox[2] - text_bbox[0] # Calculate text width
|
||||
text_height = text_bbox[3] - text_bbox[1] # Calculate text height
|
||||
|
||||
# Define background rectangle (top of the image)
|
||||
draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white")
|
||||
|
||||
# Center text horizontally within the warning area
|
||||
text_x = (viewport["width"] - text_width) // 2
|
||||
text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
|
||||
|
||||
# Draw the warning text in red
|
||||
draw.text((text_x, text_y), warning_text, fill="red", font=font)
|
||||
|
||||
# Save or return the final image
|
||||
output = io.BytesIO()
|
||||
stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
screenshot = output.getvalue()
|
||||
|
||||
finally:
|
||||
# Restore the original viewport size
|
||||
page.set_viewport_size(original_viewport)
|
||||
|
||||
return screenshot
|
||||
@@ -4,6 +4,7 @@ from urllib.parse import urlparse
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
@@ -89,6 +90,7 @@ class fetcher(Fetcher):
|
||||
from playwright.sync_api import sync_playwright
|
||||
import playwright._impl._errors
|
||||
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
||||
import time
|
||||
self.delete_browser_steps_screenshots()
|
||||
response = None
|
||||
|
||||
@@ -179,6 +181,7 @@ class fetcher(Fetcher):
|
||||
|
||||
self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
now = time.time()
|
||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||
if current_include_filters is not None:
|
||||
self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||
@@ -190,6 +193,8 @@ class fetcher(Fetcher):
|
||||
self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
|
||||
|
||||
self.content = self.page.content()
|
||||
logger.debug(f"Time to scrape xpath element data in browser {time.time() - now:.2f}s")
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
# JPEG is better here because the screenshots can be very very large
|
||||
@@ -199,10 +204,15 @@ class fetcher(Fetcher):
|
||||
# acceptable screenshot quality here
|
||||
try:
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = self.page.screenshot(type='jpeg',
|
||||
full_page=True,
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)),
|
||||
)
|
||||
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
||||
logger.warning(
|
||||
f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
||||
self.screenshot = capture_stitched_together_full_page(self.page)
|
||||
else:
|
||||
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||
|
||||
@@ -29,8 +29,11 @@ function isItemInStock() {
|
||||
'currently unavailable',
|
||||
'dieser artikel ist bald wieder verfügbar',
|
||||
'dostępne wkrótce',
|
||||
'en rupture',
|
||||
'en rupture de stock',
|
||||
'épuisé',
|
||||
'esgotado',
|
||||
'indisponible',
|
||||
'indisponível',
|
||||
'isn\'t in stock right now',
|
||||
'isnt in stock right now',
|
||||
@@ -52,6 +55,8 @@ function isItemInStock() {
|
||||
'niet leverbaar',
|
||||
'niet op voorraad',
|
||||
'no disponible',
|
||||
'non disponibile',
|
||||
'non disponible',
|
||||
'no longer in stock',
|
||||
'no tickets available',
|
||||
'not available',
|
||||
@@ -64,8 +69,10 @@ function isItemInStock() {
|
||||
'não estamos a aceitar encomendas',
|
||||
'out of stock',
|
||||
'out-of-stock',
|
||||
'plus disponible',
|
||||
'prodotto esaurito',
|
||||
'produkt niedostępny',
|
||||
'rupture',
|
||||
'sold out',
|
||||
'sold-out',
|
||||
'stokta yok',
|
||||
|
||||
@@ -41,7 +41,7 @@ const findUpTag = (el) => {
|
||||
|
||||
// Strategy 1: If it's an input, with name, and there's only one, prefer that
|
||||
if (el.name !== undefined && el.name.length) {
|
||||
var proposed = el.tagName + "[name=" + el.name + "]";
|
||||
var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
|
||||
var proposed_element = window.document.querySelectorAll(proposed);
|
||||
if (proposed_element.length) {
|
||||
if (proposed_element.length === 1) {
|
||||
@@ -102,13 +102,15 @@ function collectVisibleElements(parent, visibleElements) {
|
||||
const children = parent.children;
|
||||
for (let i = 0; i < children.length; i++) {
|
||||
const child = children[i];
|
||||
const computedStyle = window.getComputedStyle(child);
|
||||
|
||||
if (
|
||||
child.nodeType === Node.ELEMENT_NODE &&
|
||||
window.getComputedStyle(child).display !== 'none' &&
|
||||
window.getComputedStyle(child).visibility !== 'hidden' &&
|
||||
computedStyle.display !== 'none' &&
|
||||
computedStyle.visibility !== 'hidden' &&
|
||||
child.offsetWidth >= 0 &&
|
||||
child.offsetHeight >= 0 &&
|
||||
window.getComputedStyle(child).contentVisibility !== 'hidden'
|
||||
computedStyle.contentVisibility !== 'hidden'
|
||||
) {
|
||||
// If the child is an element and is visible, recursively collect visible elements
|
||||
collectVisibleElements(child, visibleElements);
|
||||
@@ -173,6 +175,7 @@ visibleElementsArray.forEach(function (element) {
|
||||
|
||||
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
|
||||
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;
|
||||
const computedStyle = window.getComputedStyle(element);
|
||||
|
||||
size_pos.push({
|
||||
xpath: xpath_result,
|
||||
@@ -184,10 +187,10 @@ visibleElementsArray.forEach(function (element) {
|
||||
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
|
||||
// tagtype used by Browser Steps
|
||||
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
|
||||
isClickable: window.getComputedStyle(element).cursor === "pointer",
|
||||
isClickable: computedStyle.cursor === "pointer",
|
||||
// Used by the keras trainer
|
||||
fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
|
||||
fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
|
||||
fontSize: computedStyle.getPropertyValue('font-size'),
|
||||
fontWeight: computedStyle.getPropertyValue('font-weight'),
|
||||
hasDigitCurrency: hasDigitCurrency,
|
||||
label: label,
|
||||
});
|
||||
|
||||
@@ -875,14 +875,14 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
is_html_webdriver = False
|
||||
watch_uses_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
watch_uses_webdriver = True
|
||||
|
||||
from zoneinfo import available_timezones
|
||||
|
||||
# Only works reliably with Playwright
|
||||
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver
|
||||
|
||||
template_args = {
|
||||
'available_processors': processors.available_processors(),
|
||||
'available_timezones': sorted(available_timezones()),
|
||||
@@ -895,14 +895,13 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
|
||||
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
|
||||
'is_html_webdriver': is_html_webdriver,
|
||||
'watch_uses_webdriver': watch_uses_webdriver,
|
||||
'jq_support': jq_support,
|
||||
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||
'settings_application': datastore.data['settings']['application'],
|
||||
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'visualselector_enabled': visualselector_enabled,
|
||||
'watch': watch
|
||||
}
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ class validateTimeZoneName(object):
|
||||
|
||||
class ScheduleLimitDaySubForm(Form):
|
||||
enabled = BooleanField("not set", default=True)
|
||||
start_time = TimeStringField("Start At", default="00:00", render_kw={"placeholder": "HH:MM"}, validators=[validators.Optional()])
|
||||
start_time = TimeStringField("Start At", default="00:00", validators=[validators.Optional()])
|
||||
duration = FormField(TimeDurationForm, label="Run duration")
|
||||
|
||||
class ScheduleLimitForm(Form):
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from typing import List
|
||||
from loguru import logger
|
||||
from lxml import etree
|
||||
from typing import List
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -298,8 +299,10 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
||||
try:
|
||||
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
|
||||
except json.JSONDecodeError:
|
||||
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
|
||||
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(str(e))
|
||||
|
||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||
# As a last resort, try to parse the whole <body>
|
||||
|
||||
@@ -69,7 +69,7 @@ def parse_headers_from_text_file(filepath):
|
||||
for l in f.readlines():
|
||||
l = l.strip()
|
||||
if not l.startswith('#') and ':' in l:
|
||||
(k, v) = l.split(':')
|
||||
(k, v) = l.split(':', 1) # Split only on the first colon
|
||||
headers[k.strip()] = v.strip()
|
||||
|
||||
return headers
|
||||
@@ -299,34 +299,17 @@ class model(watch_base):
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
def save_history_text(self, contents, timestamp, snapshot_id):
|
||||
import brotli
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.storage.filesystem_storage import FileSystemStorage
|
||||
|
||||
logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
|
||||
else:
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(contents.encode('utf-8'))
|
||||
|
||||
# Append to index
|
||||
# @todo check last char was \n
|
||||
index_fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
with open(index_fname, 'a') as f:
|
||||
f.write("{},{}\n".format(timestamp, snapshot_fname))
|
||||
f.close()
|
||||
# Get storage from singleton store or create a filesystem storage as default
|
||||
store = ChangeDetectionStore.instance if hasattr(ChangeDetectionStore, 'instance') else None
|
||||
storage = store.storage if store and hasattr(store, 'storage') else FileSystemStorage(self.__datastore_path)
|
||||
|
||||
# Use the storage backend to save the history text
|
||||
snapshot_fname = storage.save_history_text(self.get('uuid'), contents, timestamp, snapshot_id)
|
||||
|
||||
self.__newest_history_key = timestamp
|
||||
self.__history_n += 1
|
||||
@@ -352,7 +335,7 @@ class model(watch_base):
|
||||
# Iterate over all history texts and see if something new exists
|
||||
# Always applying .strip() to start/end but optionally replace any other whitespace
|
||||
def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False):
|
||||
local_lines = []
|
||||
local_lines = set([])
|
||||
if lines:
|
||||
if ignore_whitespace:
|
||||
if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
|
||||
@@ -527,7 +510,7 @@ class model(watch_base):
|
||||
def save_error_text(self, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
with open(target_path, 'w') as f:
|
||||
with open(target_path, 'w', encoding='utf-8') as f:
|
||||
f.write(contents)
|
||||
|
||||
def save_xpath_data(self, data, as_error=False):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg
|
||||
version="1.1"
|
||||
id="Layer_1"
|
||||
id="copy"
|
||||
x="0px"
|
||||
y="0px"
|
||||
viewBox="0 0 115.77 122.88"
|
||||
|
||||
|
Before Width: | Height: | Size: 2.5 KiB After Width: | Height: | Size: 2.5 KiB |
@@ -6,7 +6,7 @@
|
||||
height="7.5005589"
|
||||
width="11.248507"
|
||||
version="1.1"
|
||||
id="Layer_1"
|
||||
id="email"
|
||||
viewBox="0 0 7.1975545 4.7993639"
|
||||
xml:space="preserve"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
|
||||
|
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 1.9 KiB |
@@ -1,7 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg
|
||||
version="1.1"
|
||||
id="Layer_1"
|
||||
id="schedule"
|
||||
x="0px"
|
||||
y="0px"
|
||||
viewBox="0 0 661.20001 665.40002"
|
||||
|
||||
|
Before Width: | Height: | Size: 5.9 KiB After Width: | Height: | Size: 5.9 KiB |
@@ -221,7 +221,7 @@ $(document).ready(function () {
|
||||
// If you switch to "Click X,y" after an element here is setup, it will give the last co-ords anyway
|
||||
//if (x['isClickable'] || x['tagName'].startsWith('h') || x['tagName'] === 'a' || x['tagName'] === 'button' || x['tagtype'] === 'submit' || x['tagtype'] === 'checkbox' || x['tagtype'] === 'radio' || x['tagtype'] === 'li') {
|
||||
$('select', first_available).val('Click element').change();
|
||||
$('input[type=text]', first_available).first().val(x['xpath']);
|
||||
$('input[type=text]', first_available).first().val(x['xpath']).focus();
|
||||
found_something = true;
|
||||
//}
|
||||
}
|
||||
@@ -305,7 +305,7 @@ $(document).ready(function () {
|
||||
|
||||
if ($(this).val() === 'Click X,Y' && last_click_xy['x'] > 0 && $(elem_value).val().length === 0) {
|
||||
// @todo handle scale
|
||||
$(elem_value).val(last_click_xy['x'] + ',' + last_click_xy['y']);
|
||||
$(elem_value).val(last_click_xy['x'] + ',' + last_click_xy['y']).focus();
|
||||
}
|
||||
}).change();
|
||||
|
||||
|
||||
@@ -40,19 +40,22 @@
|
||||
}
|
||||
}
|
||||
|
||||
@media only screen and (min-width: 760px) {
|
||||
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
font-size: 80%;
|
||||
#browser-steps-ui {
|
||||
flex-grow: 1; /* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1; /* Allow it to shrink if needed */
|
||||
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px;
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
font-size: 80%;
|
||||
|
||||
#browser-steps-ui {
|
||||
flex-grow: 1; /* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1; /* Allow it to shrink if needed */
|
||||
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#browser-steps-fieldlist {
|
||||
@@ -63,15 +66,21 @@
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll;
|
||||
}
|
||||
|
||||
/* this is duplicate :( */
|
||||
#browsersteps-selector-wrapper {
|
||||
height: 100% !important;
|
||||
}
|
||||
}
|
||||
|
||||
/* this is duplicate :( */
|
||||
#browsersteps-selector-wrapper {
|
||||
height: 100%;
|
||||
|
||||
width: 100%;
|
||||
overflow-y: scroll;
|
||||
position: relative;
|
||||
//width: 100%;
|
||||
height: 80vh;
|
||||
|
||||
> img {
|
||||
position: absolute;
|
||||
max-width: 100%;
|
||||
@@ -91,7 +100,6 @@
|
||||
left: 50%;
|
||||
top: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
margin-left: -40px;
|
||||
z-index: 100;
|
||||
max-width: 350px;
|
||||
text-align: center;
|
||||
|
||||
@@ -46,21 +46,22 @@
|
||||
#browser_steps li > label {
|
||||
display: none; }
|
||||
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
font-size: 80%; }
|
||||
#browser-steps .flex-wrapper #browser-steps-ui {
|
||||
flex-grow: 1;
|
||||
/* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1;
|
||||
/* Allow it to shrink if needed */
|
||||
flex-basis: 0;
|
||||
/* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px; }
|
||||
#browser-steps .flex-wrapper #browser-steps-fieldlist {
|
||||
@media only screen and (min-width: 760px) {
|
||||
#browser-steps .flex-wrapper {
|
||||
display: flex;
|
||||
flex-flow: row;
|
||||
height: 70vh;
|
||||
font-size: 80%; }
|
||||
#browser-steps .flex-wrapper #browser-steps-ui {
|
||||
flex-grow: 1;
|
||||
/* Allow it to grow and fill the available space */
|
||||
flex-shrink: 1;
|
||||
/* Allow it to shrink if needed */
|
||||
flex-basis: 0;
|
||||
/* Start with 0 base width so it stretches as much as possible */
|
||||
background-color: #eee;
|
||||
border-radius: 5px; }
|
||||
#browser-steps-fieldlist {
|
||||
flex-grow: 0;
|
||||
/* Don't allow it to grow */
|
||||
flex-shrink: 0;
|
||||
@@ -71,13 +72,16 @@
|
||||
/* Set a max width to prevent overflow */
|
||||
padding-left: 1rem;
|
||||
overflow-y: scroll; }
|
||||
/* this is duplicate :( */
|
||||
#browsersteps-selector-wrapper {
|
||||
height: 100% !important; } }
|
||||
|
||||
/* this is duplicate :( */
|
||||
#browsersteps-selector-wrapper {
|
||||
height: 100%;
|
||||
width: 100%;
|
||||
overflow-y: scroll;
|
||||
position: relative;
|
||||
height: 80vh;
|
||||
/* nice tall skinny one */ }
|
||||
#browsersteps-selector-wrapper > img {
|
||||
position: absolute;
|
||||
@@ -92,7 +96,6 @@
|
||||
left: 50%;
|
||||
top: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
margin-left: -40px;
|
||||
z-index: 100;
|
||||
max-width: 350px;
|
||||
text-align: center; }
|
||||
|
||||
61
changedetectionio/storage/README.md
Normal file
@@ -0,0 +1,61 @@
|
||||
# Storage Backends for changedetection.io
|
||||
|
||||
This module provides different storage backends for changedetection.io, allowing you to store data in various systems:
|
||||
|
||||
- **FileSystemStorage**: The default storage backend that stores data on the local filesystem.
|
||||
- **MongoDBStorage**: Stores data in a MongoDB database.
|
||||
- **S3Storage**: Stores data in an Amazon S3 bucket.
|
||||
|
||||
## Usage
|
||||
|
||||
The storage backend is automatically selected based on the datastore path provided when initializing the application:
|
||||
|
||||
- For filesystem storage (default): `/datastore`
|
||||
- For MongoDB storage: `mongodb://username:password@host:port/database`
|
||||
- For S3 storage: `s3://bucket-name/optional-prefix`
|
||||
|
||||
## Configuration
|
||||
|
||||
### Filesystem Storage
|
||||
|
||||
The default storage backend. Simply specify a directory path:
|
||||
|
||||
```
|
||||
changedetection.io -d /path/to/datastore
|
||||
```
|
||||
|
||||
### MongoDB Storage
|
||||
|
||||
To use MongoDB storage, specify a MongoDB connection URI:
|
||||
|
||||
```
|
||||
changedetection.io -d mongodb://username:password@host:port/database
|
||||
```
|
||||
|
||||
Make sure to install the required dependencies:
|
||||
|
||||
```
|
||||
pip install -r requirements-storage.txt
|
||||
```
|
||||
|
||||
### Amazon S3 Storage
|
||||
|
||||
To use S3 storage, specify an S3 URI:
|
||||
|
||||
```
|
||||
changedetection.io -d s3://bucket-name/optional-prefix
|
||||
```
|
||||
|
||||
Make sure to:
|
||||
1. Install the required dependencies: `pip install -r requirements-storage.txt`
|
||||
2. Configure AWS credentials using environment variables or IAM roles:
|
||||
- Set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables
|
||||
- Or use an IAM role when running on AWS EC2/ECS/EKS
|
||||
|
||||
## Custom Storage Backends
|
||||
|
||||
You can create custom storage backends by:
|
||||
|
||||
1. Subclassing the `StorageBase` abstract class in `storage_base.py`
|
||||
2. Implementing all required methods
|
||||
3. Adding your backend to the `storage_factory.py` file
|
||||
1
changedetectionio/storage/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# This module contains storage backend implementations
|
||||
449
changedetectionio/storage/filesystem_storage.py
Normal file
@@ -0,0 +1,449 @@
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
import brotli
|
||||
import zlib
|
||||
import pathlib
|
||||
from loguru import logger
|
||||
from os import path
|
||||
|
||||
from .storage_base import StorageBase
|
||||
|
||||
class FileSystemStorage(StorageBase):
|
||||
"""File system storage backend"""
|
||||
|
||||
def __init__(self, datastore_path, include_default_watches=True, version_tag="0.0.0"):
|
||||
"""Initialize the file system storage backend
|
||||
|
||||
Args:
|
||||
datastore_path (str): Path to the datastore
|
||||
include_default_watches (bool): Whether to include default watches
|
||||
version_tag (str): Version tag
|
||||
"""
|
||||
self.datastore_path = datastore_path
|
||||
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
|
||||
logger.info(f"Datastore path is '{self.json_store_path}'")
|
||||
|
||||
def load_data(self):
|
||||
"""Load data from the file system
|
||||
|
||||
Returns:
|
||||
dict: The loaded data
|
||||
"""
|
||||
if not path.isfile(self.json_store_path):
|
||||
return None
|
||||
|
||||
with open(self.json_store_path) as json_file:
|
||||
return json.load(json_file)
|
||||
|
||||
def save_data(self, data):
|
||||
"""Save data to the file system
|
||||
|
||||
Args:
|
||||
data (dict): The data to save
|
||||
"""
|
||||
try:
|
||||
# Re #286 - First write to a temp file, then confirm it looks OK and rename it
|
||||
# This is a fairly basic strategy to deal with the case that the file is corrupted,
|
||||
# system was out of memory, out of RAM etc
|
||||
with open(self.json_store_path+".tmp", 'w') as json_file:
|
||||
json.dump(data, json_file, indent=4)
|
||||
os.replace(self.json_store_path+".tmp", self.json_store_path)
|
||||
except Exception as e:
|
||||
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
|
||||
raise e
|
||||
|
||||
def get_watch_dir(self, watch_uuid):
|
||||
"""Get the directory for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The watch directory
|
||||
"""
|
||||
return os.path.join(self.datastore_path, watch_uuid)
|
||||
|
||||
def ensure_data_dir_exists(self, watch_uuid):
|
||||
"""Ensure the data directory exists for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
if not os.path.isdir(watch_dir):
|
||||
logger.debug(f"> Creating data dir {watch_dir}")
|
||||
os.makedirs(watch_dir, exist_ok=True)
|
||||
|
||||
def save_history_text(self, watch_uuid, contents, timestamp, snapshot_id):
|
||||
"""Save history text to the file system
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Contents to save
|
||||
timestamp (int): Timestamp
|
||||
snapshot_id (str): Snapshot ID
|
||||
|
||||
Returns:
|
||||
str: Snapshot filename
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
|
||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
skip_brotli = os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False').lower() in ('true', '1', 't')
|
||||
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
dest = os.path.join(watch_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
|
||||
else:
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(watch_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(contents.encode('utf-8'))
|
||||
|
||||
# Append to index
|
||||
index_fname = os.path.join(watch_dir, "history.txt")
|
||||
with open(index_fname, 'a') as f:
|
||||
f.write("{},{}\n".format(timestamp, snapshot_fname))
|
||||
|
||||
return snapshot_fname
|
||||
|
||||
def get_history(self, watch_uuid):
|
||||
"""Get history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
dict: The history with timestamp keys and snapshot IDs as values
|
||||
"""
|
||||
tmp_history = {}
|
||||
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
if not os.path.isdir(watch_dir):
|
||||
return tmp_history
|
||||
|
||||
# Read the history file as a dict
|
||||
fname = os.path.join(watch_dir, "history.txt")
|
||||
if os.path.isfile(fname):
|
||||
logger.debug(f"Reading watch history index for {watch_uuid}")
|
||||
with open(fname, "r") as f:
|
||||
for i in f.readlines():
|
||||
if ',' in i:
|
||||
k, v = i.strip().split(',', 2)
|
||||
|
||||
# The index history could contain a relative path, so we need to make the fullpath
|
||||
# so that python can read it
|
||||
if not '/' in v and not '\'' in v:
|
||||
v = os.path.join(watch_dir, v)
|
||||
else:
|
||||
# It's possible that they moved the datadir on older versions
|
||||
# So the snapshot exists but is in a different path
|
||||
snapshot_fname = v.split('/')[-1]
|
||||
proposed_new_path = os.path.join(watch_dir, snapshot_fname)
|
||||
if not os.path.exists(v) and os.path.exists(proposed_new_path):
|
||||
v = proposed_new_path
|
||||
|
||||
tmp_history[k] = v
|
||||
|
||||
return tmp_history
|
||||
|
||||
def get_history_snapshot(self, watch_uuid, timestamp):
|
||||
"""Get a history snapshot from the file system
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str: The snapshot content
|
||||
"""
|
||||
history = self.get_history(watch_uuid)
|
||||
if not timestamp in history:
|
||||
return None
|
||||
|
||||
filepath = history[timestamp]
|
||||
|
||||
# See if a brotli versions exists and switch to that
|
||||
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
|
||||
filepath = f"{filepath}.br"
|
||||
|
||||
# OR in the backup case that the .br does not exist, but the plain one does
|
||||
if filepath.endswith('.br') and not os.path.isfile(filepath):
|
||||
if os.path.isfile(filepath.replace('.br', '')):
|
||||
filepath = filepath.replace('.br', '')
|
||||
|
||||
if filepath.endswith('.br'):
|
||||
# Brotli doesnt have a fileheader to detect it, so we rely on filename
|
||||
# https://www.rfc-editor.org/rfc/rfc7932
|
||||
with open(filepath, 'rb') as f:
|
||||
return(brotli.decompress(f.read()).decode('utf-8'))
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
return f.read()
|
||||
|
||||
def save_screenshot(self, watch_uuid, screenshot, as_error=False):
|
||||
"""Save a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
screenshot (bytes): Screenshot data
|
||||
as_error (bool): Whether this is an error screenshot
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(watch_dir, "last-error-screenshot.png")
|
||||
else:
|
||||
target_path = os.path.join(watch_dir, "last-screenshot.png")
|
||||
|
||||
with open(target_path, 'wb') as f:
|
||||
f.write(screenshot)
|
||||
|
||||
def get_screenshot(self, watch_uuid, is_error=False):
|
||||
"""Get a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get the error screenshot
|
||||
|
||||
Returns:
|
||||
str or None: The screenshot path or None if not available
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
if is_error:
|
||||
fname = os.path.join(watch_dir, "last-error-screenshot.png")
|
||||
else:
|
||||
fname = os.path.join(watch_dir, "last-screenshot.png")
|
||||
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
|
||||
return None
|
||||
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
"""Save error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Error contents
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
target_path = os.path.join(watch_dir, "last-error.txt")
|
||||
with open(target_path, 'w', encoding='utf-8') as f:
|
||||
f.write(contents)
|
||||
|
||||
def get_error_text(self, watch_uuid):
|
||||
"""Get error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str or False: The error text or False if not available
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
fname = os.path.join(watch_dir, "last-error.txt")
|
||||
|
||||
if os.path.isfile(fname):
|
||||
with open(fname, 'r') as f:
|
||||
return f.read()
|
||||
|
||||
return False
|
||||
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
"""Save XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
data (dict): XPath data
|
||||
as_error (bool): Whether this is error data
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(watch_dir, "elements-error.deflate")
|
||||
else:
|
||||
target_path = os.path.join(watch_dir, "elements.deflate")
|
||||
|
||||
with open(target_path, 'wb') as f:
|
||||
f.write(zlib.compress(json.dumps(data).encode()))
|
||||
|
||||
def get_xpath_data(self, watch_uuid, is_error=False):
|
||||
"""Get XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get error data
|
||||
|
||||
Returns:
|
||||
dict or None: The XPath data or None if not available
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
if is_error:
|
||||
path = os.path.join(watch_dir, "elements-error.deflate")
|
||||
else:
|
||||
path = os.path.join(watch_dir, "elements.deflate")
|
||||
|
||||
if not os.path.isfile(path):
|
||||
return None
|
||||
|
||||
with open(path, 'rb') as f:
|
||||
return json.loads(zlib.decompress(f.read()).decode('utf-8'))
|
||||
|
||||
def save_last_fetched_html(self, watch_uuid, timestamp, contents):
|
||||
"""Save last fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
contents (str): HTML contents
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(watch_dir, snapshot_fname)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
contents = contents.encode('utf-8') if isinstance(contents, str) else contents
|
||||
try:
|
||||
f.write(brotli.compress(contents))
|
||||
except Exception as e:
|
||||
logger.warning(f"{watch_uuid} - Unable to compress snapshot, saving as raw data to {filepath}")
|
||||
logger.warning(e)
|
||||
f.write(contents)
|
||||
|
||||
# Prune old snapshots - keep only the newest 2
|
||||
self._prune_last_fetched_html_snapshots(watch_uuid)
|
||||
|
||||
def _prune_last_fetched_html_snapshots(self, watch_uuid):
|
||||
"""Prune old HTML snapshots
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
history = self.get_history(watch_uuid)
|
||||
|
||||
dates = list(history.keys())
|
||||
dates.reverse()
|
||||
|
||||
for index, timestamp in enumerate(dates):
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(watch_dir, snapshot_fname)
|
||||
|
||||
# Keep only the first 2
|
||||
if index > 1 and os.path.isfile(filepath):
|
||||
os.remove(filepath)
|
||||
|
||||
def get_fetched_html(self, watch_uuid, timestamp):
|
||||
"""Get fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str or False: The HTML or False if not available
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(watch_dir, snapshot_fname)
|
||||
|
||||
if os.path.isfile(filepath):
|
||||
with open(filepath, 'rb') as f:
|
||||
return brotli.decompress(f.read()).decode('utf-8')
|
||||
|
||||
return False
|
||||
|
||||
def save_last_text_fetched_before_filters(self, watch_uuid, contents):
|
||||
"""Save the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Text contents
|
||||
"""
|
||||
self.ensure_data_dir_exists(watch_uuid)
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
|
||||
filepath = os.path.join(watch_dir, 'last-fetched.br')
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
|
||||
|
||||
def get_last_fetched_text_before_filters(self, watch_uuid):
|
||||
"""Get the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The text
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
filepath = os.path.join(watch_dir, 'last-fetched.br')
|
||||
|
||||
if not os.path.isfile(filepath):
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
history = self.get_history(watch_uuid)
|
||||
dates = list(history.keys())
|
||||
|
||||
if len(dates):
|
||||
return self.get_history_snapshot(watch_uuid, dates[-1])
|
||||
else:
|
||||
return ''
|
||||
|
||||
with open(filepath, 'rb') as f:
|
||||
return brotli.decompress(f.read()).decode('utf-8')
|
||||
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
"""Check if visual selector data is ready
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
bool: Whether visual selector data is ready
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
screenshot_filename = os.path.join(watch_dir, "last-screenshot.png")
|
||||
elements_index_filename = os.path.join(watch_dir, "elements.deflate")
|
||||
|
||||
return path.isfile(screenshot_filename) and path.isfile(elements_index_filename)
|
||||
|
||||
def clear_watch_history(self, watch_uuid):
|
||||
"""Clear history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
if not os.path.exists(watch_dir):
|
||||
return
|
||||
|
||||
# Delete all files but keep the directory
|
||||
for item in pathlib.Path(watch_dir).glob("*.*"):
|
||||
os.unlink(item)
|
||||
|
||||
def delete_watch(self, watch_uuid):
|
||||
"""Delete a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_dir = self.get_watch_dir(watch_uuid)
|
||||
if os.path.exists(watch_dir):
|
||||
shutil.rmtree(watch_dir)
|
||||
466
changedetectionio/storage/mongodb_storage.py
Normal file
@@ -0,0 +1,466 @@
|
||||
import os
|
||||
from copy import deepcopy
|
||||
|
||||
import brotli
|
||||
import zlib
|
||||
import json
|
||||
import time
|
||||
from loguru import logger
|
||||
from pymongo import MongoClient
|
||||
from urllib.parse import urlparse
|
||||
import base64
|
||||
|
||||
from .storage_base import StorageBase
|
||||
|
||||
class MongoDBStorage(StorageBase):
|
||||
"""MongoDB storage backend"""
|
||||
|
||||
def __init__(self, datastore_path, include_default_watches=True, version_tag="0.0.0"):
|
||||
"""Initialize the MongoDB storage backend
|
||||
|
||||
Args:
|
||||
datastore_path (str): MongoDB connection URI
|
||||
include_default_watches (bool): Whether to include default watches
|
||||
version_tag (str): Version tag
|
||||
"""
|
||||
# Parse MongoDB URI from datastore_path
|
||||
parsed_uri = urlparse(datastore_path)
|
||||
self.db_name = parsed_uri.path.lstrip('/')
|
||||
if not self.db_name:
|
||||
self.db_name = 'changedetection'
|
||||
|
||||
# Connect to MongoDB
|
||||
self.client = MongoClient(datastore_path)
|
||||
self.db = self.client[self.db_name]
|
||||
|
||||
# Collections
|
||||
self.app_collection = self.db['app']
|
||||
self.watches_collection = self.db['watches']
|
||||
self.snapshots_collection = self.db['snapshots']
|
||||
self.history_collection = self.db['history']
|
||||
self.error_collection = self.db['errors']
|
||||
self.xpath_collection = self.db['xpath']
|
||||
self.html_collection = self.db['html']
|
||||
|
||||
logger.info(f"MongoDB storage initialized, connected to {datastore_path}")
|
||||
|
||||
def load_data(self):
|
||||
"""Load data from MongoDB
|
||||
|
||||
Returns:
|
||||
dict: The loaded data
|
||||
"""
|
||||
app_data = self.app_collection.find_one({'_id': 'app_data'})
|
||||
if not app_data:
|
||||
return None
|
||||
|
||||
# Remove MongoDB _id field
|
||||
if '_id' in app_data:
|
||||
del app_data['_id']
|
||||
|
||||
return app_data
|
||||
|
||||
def save_data(self, data):
|
||||
"""Save data to MongoDB
|
||||
|
||||
Args:
|
||||
data (dict): The data to save
|
||||
"""
|
||||
try:
|
||||
# Create a copy to modify
|
||||
data_copy = deepcopy(data)
|
||||
|
||||
# Set _id for app data
|
||||
data_copy['_id'] = 'app_data'
|
||||
|
||||
# Insert or update app data
|
||||
self.app_collection.replace_one({'_id': 'app_data'}, data_copy, upsert=True)
|
||||
|
||||
# Also store watches separately for more granular access
|
||||
# This provides a safety net in case of corrupted app_data
|
||||
watches = data.get('watching', {})
|
||||
for uuid, watch in watches.items():
|
||||
if isinstance(watch, dict): # Handle case where watch is a Watch object
|
||||
watch_copy = deepcopy(dict(watch))
|
||||
else:
|
||||
watch_copy = deepcopy(watch)
|
||||
watch_copy['_id'] = uuid
|
||||
self.watches_collection.replace_one({'_id': uuid}, watch_copy, upsert=True)
|
||||
|
||||
# Also store tags separately
|
||||
if 'settings' in data and 'application' in data['settings'] and 'tags' in data['settings']['application']:
|
||||
tags = data['settings']['application']['tags']
|
||||
for uuid, tag in tags.items():
|
||||
if isinstance(tag, dict): # Handle case where tag is a Tag object
|
||||
tag_copy = deepcopy(dict(tag))
|
||||
else:
|
||||
tag_copy = deepcopy(tag)
|
||||
tag_copy['_id'] = uuid
|
||||
self.db['tags'].replace_one({'_id': uuid}, tag_copy, upsert=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error writing to MongoDB: {str(e)}")
|
||||
raise e
|
||||
|
||||
def ensure_data_dir_exists(self, watch_uuid):
|
||||
"""Ensure the data directory exists for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# MongoDB doesn't need directories, this is a no-op
|
||||
pass
|
||||
|
||||
def save_history_text(self, watch_uuid, contents, timestamp, snapshot_id):
|
||||
"""Save history text to MongoDB
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Contents to save
|
||||
timestamp (int): Timestamp
|
||||
snapshot_id (str): Snapshot ID
|
||||
|
||||
Returns:
|
||||
str: Snapshot ID
|
||||
"""
|
||||
# Compress the contents
|
||||
compressed_contents = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||
|
||||
# Store the snapshot
|
||||
snapshot_data = {
|
||||
'_id': f"{watch_uuid}:{timestamp}",
|
||||
'watch_uuid': watch_uuid,
|
||||
'timestamp': timestamp,
|
||||
'snapshot_id': snapshot_id,
|
||||
'contents': base64.b64encode(compressed_contents).decode('ascii'),
|
||||
'compressed': True
|
||||
}
|
||||
|
||||
self.snapshots_collection.replace_one({'_id': snapshot_data['_id']}, snapshot_data, upsert=True)
|
||||
|
||||
# Update history index
|
||||
history_entry = {
|
||||
'watch_uuid': watch_uuid,
|
||||
'timestamp': timestamp,
|
||||
'snapshot_id': snapshot_id
|
||||
}
|
||||
|
||||
self.history_collection.replace_one(
|
||||
{'watch_uuid': watch_uuid, 'timestamp': timestamp},
|
||||
history_entry,
|
||||
upsert=True
|
||||
)
|
||||
|
||||
return snapshot_id
|
||||
|
||||
def get_history(self, watch_uuid):
|
||||
"""Get history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
dict: The history with timestamp keys and snapshot IDs as values
|
||||
"""
|
||||
history = {}
|
||||
|
||||
# Query history entries for this watch
|
||||
entries = self.history_collection.find({'watch_uuid': watch_uuid}).sort('timestamp', 1)
|
||||
|
||||
for entry in entries:
|
||||
history[str(entry['timestamp'])] = entry['snapshot_id']
|
||||
|
||||
return history
|
||||
|
||||
def get_history_snapshot(self, watch_uuid, timestamp):
|
||||
"""Get a history snapshot from MongoDB
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str: The snapshot content
|
||||
"""
|
||||
# Query for the snapshot
|
||||
snapshot = self.snapshots_collection.find_one({'_id': f"{watch_uuid}:{timestamp}"})
|
||||
|
||||
if not snapshot:
|
||||
return None
|
||||
|
||||
if snapshot.get('compressed', False):
|
||||
# Decompress the contents
|
||||
compressed_data = base64.b64decode(snapshot['contents'])
|
||||
return brotli.decompress(compressed_data).decode('utf-8')
|
||||
else:
|
||||
return snapshot['contents']
|
||||
|
||||
def save_screenshot(self, watch_uuid, screenshot, as_error=False):
|
||||
"""Save a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
screenshot (bytes): Screenshot data
|
||||
as_error (bool): Whether this is an error screenshot
|
||||
"""
|
||||
collection_name = 'error_screenshots' if as_error else 'screenshots'
|
||||
collection = self.db[collection_name]
|
||||
|
||||
# Encode the screenshot as base64
|
||||
encoded_screenshot = base64.b64encode(screenshot).decode('ascii')
|
||||
|
||||
screenshot_data = {
|
||||
'_id': watch_uuid,
|
||||
'watch_uuid': watch_uuid,
|
||||
'screenshot': encoded_screenshot,
|
||||
'timestamp': int(time.time())
|
||||
}
|
||||
|
||||
collection.replace_one({'_id': watch_uuid}, screenshot_data, upsert=True)
|
||||
|
||||
def get_screenshot(self, watch_uuid, is_error=False):
|
||||
"""Get a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get the error screenshot
|
||||
|
||||
Returns:
|
||||
bytes or None: The screenshot data or None if not available
|
||||
"""
|
||||
collection_name = 'error_screenshots' if is_error else 'screenshots'
|
||||
collection = self.db[collection_name]
|
||||
|
||||
screenshot_data = collection.find_one({'_id': watch_uuid})
|
||||
if not screenshot_data:
|
||||
return None
|
||||
|
||||
# Decode the screenshot from base64
|
||||
return base64.b64decode(screenshot_data['screenshot'])
|
||||
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
"""Save error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Error contents
|
||||
"""
|
||||
error_data = {
|
||||
'_id': watch_uuid,
|
||||
'watch_uuid': watch_uuid,
|
||||
'error_text': contents,
|
||||
'timestamp': int(time.time())
|
||||
}
|
||||
|
||||
self.error_collection.replace_one({'_id': watch_uuid}, error_data, upsert=True)
|
||||
|
||||
def get_error_text(self, watch_uuid):
|
||||
"""Get error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str or False: The error text or False if not available
|
||||
"""
|
||||
error_data = self.error_collection.find_one({'_id': watch_uuid})
|
||||
if not error_data:
|
||||
return False
|
||||
|
||||
return error_data['error_text']
|
||||
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
"""Save XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
data (dict): XPath data
|
||||
as_error (bool): Whether this is error data
|
||||
"""
|
||||
# Compress the data
|
||||
compressed_data = zlib.compress(json.dumps(data).encode())
|
||||
|
||||
_id = f"{watch_uuid}:error" if as_error else watch_uuid
|
||||
|
||||
xpath_data = {
|
||||
'_id': _id,
|
||||
'watch_uuid': watch_uuid,
|
||||
'is_error': as_error,
|
||||
'data': base64.b64encode(compressed_data).decode('ascii'),
|
||||
'timestamp': int(time.time())
|
||||
}
|
||||
|
||||
self.xpath_collection.replace_one({'_id': _id}, xpath_data, upsert=True)
|
||||
|
||||
def get_xpath_data(self, watch_uuid, is_error=False):
|
||||
"""Get XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get error data
|
||||
|
||||
Returns:
|
||||
dict or None: The XPath data or None if not available
|
||||
"""
|
||||
_id = f"{watch_uuid}:error" if is_error else watch_uuid
|
||||
|
||||
xpath_data = self.xpath_collection.find_one({'_id': _id})
|
||||
if not xpath_data:
|
||||
return None
|
||||
|
||||
# Decompress the data
|
||||
compressed_data = base64.b64decode(xpath_data['data'])
|
||||
return json.loads(zlib.decompress(compressed_data).decode('utf-8'))
|
||||
|
||||
def save_last_fetched_html(self, watch_uuid, timestamp, contents):
|
||||
"""Save last fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
contents (str): HTML contents
|
||||
"""
|
||||
# Compress the contents
|
||||
contents_bytes = contents.encode('utf-8') if isinstance(contents, str) else contents
|
||||
try:
|
||||
compressed_contents = brotli.compress(contents_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"{watch_uuid} - Unable to compress HTML snapshot: {str(e)}")
|
||||
compressed_contents = contents_bytes
|
||||
|
||||
html_data = {
|
||||
'_id': f"{watch_uuid}:{timestamp}",
|
||||
'watch_uuid': watch_uuid,
|
||||
'timestamp': timestamp,
|
||||
'html': base64.b64encode(compressed_contents).decode('ascii'),
|
||||
'compressed': True
|
||||
}
|
||||
|
||||
self.html_collection.replace_one({'_id': html_data['_id']}, html_data, upsert=True)
|
||||
|
||||
# Prune old snapshots - keep only the newest 2
|
||||
self._prune_last_fetched_html_snapshots(watch_uuid)
|
||||
|
||||
def _prune_last_fetched_html_snapshots(self, watch_uuid):
|
||||
"""Prune old HTML snapshots
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# Get all HTML snapshots for this watch, sorted by timestamp descending
|
||||
html_snapshots = list(
|
||||
self.html_collection.find({'watch_uuid': watch_uuid}).sort('timestamp', -1)
|
||||
)
|
||||
|
||||
# Keep only the first 2
|
||||
if len(html_snapshots) > 2:
|
||||
for snapshot in html_snapshots[2:]:
|
||||
self.html_collection.delete_one({'_id': snapshot['_id']})
|
||||
|
||||
def get_fetched_html(self, watch_uuid, timestamp):
|
||||
"""Get fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str or False: The HTML or False if not available
|
||||
"""
|
||||
html_data = self.html_collection.find_one({'_id': f"{watch_uuid}:{timestamp}"})
|
||||
|
||||
if not html_data:
|
||||
return False
|
||||
|
||||
if html_data.get('compressed', False):
|
||||
# Decompress the contents
|
||||
compressed_data = base64.b64decode(html_data['html'])
|
||||
return brotli.decompress(compressed_data).decode('utf-8')
|
||||
else:
|
||||
return html_data['html']
|
||||
|
||||
def save_last_text_fetched_before_filters(self, watch_uuid, contents):
|
||||
"""Save the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Text contents
|
||||
"""
|
||||
# Compress the contents
|
||||
compressed_contents = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||
|
||||
last_fetched_data = {
|
||||
'_id': watch_uuid,
|
||||
'watch_uuid': watch_uuid,
|
||||
'contents': base64.b64encode(compressed_contents).decode('ascii'),
|
||||
'timestamp': int(time.time())
|
||||
}
|
||||
|
||||
self.db['last_fetched'].replace_one({'_id': watch_uuid}, last_fetched_data, upsert=True)
|
||||
|
||||
def get_last_fetched_text_before_filters(self, watch_uuid):
|
||||
"""Get the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The text
|
||||
"""
|
||||
last_fetched_data = self.db['last_fetched'].find_one({'_id': watch_uuid})
|
||||
|
||||
if not last_fetched_data:
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
history = self.get_history(watch_uuid)
|
||||
dates = list(history.keys())
|
||||
|
||||
if len(dates):
|
||||
return self.get_history_snapshot(watch_uuid, dates[-1])
|
||||
else:
|
||||
return ''
|
||||
|
||||
# Decompress the contents
|
||||
compressed_data = base64.b64decode(last_fetched_data['contents'])
|
||||
return brotli.decompress(compressed_data).decode('utf-8')
|
||||
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
"""Check if visual selector data is ready
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
bool: Whether visual selector data is ready
|
||||
"""
|
||||
# Check if screenshot and xpath data exist
|
||||
screenshot = self.db['screenshots'].find_one({'_id': watch_uuid})
|
||||
xpath_data = self.xpath_collection.find_one({'_id': watch_uuid})
|
||||
|
||||
return screenshot is not None and xpath_data is not None
|
||||
|
||||
def clear_watch_history(self, watch_uuid):
|
||||
"""Clear history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# Delete all snapshots and history for this watch
|
||||
self.snapshots_collection.delete_many({'watch_uuid': watch_uuid})
|
||||
self.history_collection.delete_many({'watch_uuid': watch_uuid})
|
||||
self.html_collection.delete_many({'watch_uuid': watch_uuid})
|
||||
self.db['last_fetched'].delete_many({'watch_uuid': watch_uuid})
|
||||
self.xpath_collection.delete_many({'watch_uuid': watch_uuid})
|
||||
self.db['screenshots'].delete_many({'watch_uuid': watch_uuid})
|
||||
self.error_collection.delete_many({'watch_uuid': watch_uuid})
|
||||
|
||||
def delete_watch(self, watch_uuid):
|
||||
"""Delete a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# Clear all history data
|
||||
self.clear_watch_history(watch_uuid)
|
||||
|
||||
# Also delete error screenshots
|
||||
self.db['error_screenshots'].delete_many({'watch_uuid': watch_uuid})
|
||||
525
changedetectionio/storage/s3_storage.py
Normal file
@@ -0,0 +1,525 @@
|
||||
import os
|
||||
import io
|
||||
import json
|
||||
import brotli
|
||||
import zlib
|
||||
import time
|
||||
from loguru import logger
|
||||
import boto3
|
||||
from urllib.parse import urlparse
|
||||
import base64
|
||||
|
||||
from .storage_base import StorageBase
|
||||
|
||||
class S3Storage(StorageBase):
|
||||
"""Amazon S3 storage backend"""
|
||||
|
||||
def __init__(self, datastore_path, include_default_watches=True, version_tag="0.0.0"):
|
||||
"""Initialize the S3 storage backend
|
||||
|
||||
Args:
|
||||
datastore_path (str): S3 URI (s3://bucket-name/optional-prefix)
|
||||
include_default_watches (bool): Whether to include default watches
|
||||
version_tag (str): Version tag
|
||||
"""
|
||||
# Parse S3 URI
|
||||
parsed_uri = urlparse(datastore_path)
|
||||
self.bucket_name = parsed_uri.netloc
|
||||
self.prefix = parsed_uri.path.lstrip('/')
|
||||
|
||||
if self.prefix and not self.prefix.endswith('/'):
|
||||
self.prefix += '/'
|
||||
|
||||
# Initialize S3 client
|
||||
# Uses AWS credentials from environment variables or IAM role
|
||||
self.s3 = boto3.client('s3')
|
||||
|
||||
logger.info(f"S3 storage initialized, using bucket '{self.bucket_name}' with prefix '{self.prefix}'")
|
||||
|
||||
def _get_key(self, path):
|
||||
"""Get the S3 key for a path
|
||||
|
||||
Args:
|
||||
path (str): Path relative to the prefix
|
||||
|
||||
Returns:
|
||||
str: The full S3 key
|
||||
"""
|
||||
return f"{self.prefix}{path}"
|
||||
|
||||
def load_data(self):
|
||||
"""Load data from S3
|
||||
|
||||
Returns:
|
||||
dict: The loaded data
|
||||
"""
|
||||
key = self._get_key("app-data.json")
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
return json.loads(response['Body'].read().decode('utf-8'))
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading data from S3: {str(e)}")
|
||||
raise e
|
||||
|
||||
def save_data(self, data):
|
||||
"""Save data to S3
|
||||
|
||||
Args:
|
||||
data (dict): The data to save
|
||||
"""
|
||||
try:
|
||||
key = self._get_key("app-data.json")
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=json.dumps(data, indent=4),
|
||||
ContentType='application/json'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving data to S3: {str(e)}")
|
||||
raise e
|
||||
|
||||
def ensure_data_dir_exists(self, watch_uuid):
|
||||
"""Ensure the data directory exists for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# S3 doesn't need directories, this is a no-op
|
||||
pass
|
||||
|
||||
def _get_watch_prefix(self, watch_uuid):
|
||||
"""Get the S3 prefix for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The watch prefix
|
||||
"""
|
||||
return self._get_key(f"watches/{watch_uuid}/")
|
||||
|
||||
def save_history_text(self, watch_uuid, contents, timestamp, snapshot_id):
|
||||
"""Save history text to S3
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Contents to save
|
||||
timestamp (int): Timestamp
|
||||
snapshot_id (str): Snapshot ID
|
||||
|
||||
Returns:
|
||||
str: Snapshot ID
|
||||
"""
|
||||
# Determine if we should compress
|
||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
skip_brotli = os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False').lower() in ('true', '1', 't')
|
||||
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
# Save the snapshot
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
snapshot_key = f"{watch_prefix}snapshots/{snapshot_id}.txt.br"
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
compressed_contents = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=snapshot_key,
|
||||
Body=compressed_contents
|
||||
)
|
||||
else:
|
||||
snapshot_key = f"{watch_prefix}snapshots/{snapshot_id}.txt"
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=snapshot_key,
|
||||
Body=contents.encode('utf-8')
|
||||
)
|
||||
|
||||
# Update history index
|
||||
history_key = f"{watch_prefix}history.txt"
|
||||
|
||||
# Try to get existing history first
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=history_key)
|
||||
history_content = response['Body'].read().decode('utf-8')
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
history_content = ""
|
||||
|
||||
# Append new entry
|
||||
history_content += f"{timestamp},{snapshot_fname}\n"
|
||||
|
||||
# Save updated history
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=history_key,
|
||||
Body=history_content
|
||||
)
|
||||
|
||||
return snapshot_fname
|
||||
|
||||
def get_history(self, watch_uuid):
|
||||
"""Get history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
dict: The history with timestamp keys and snapshot IDs as values
|
||||
"""
|
||||
tmp_history = {}
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
history_key = f"{watch_prefix}history.txt"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=history_key)
|
||||
history_content = response['Body'].read().decode('utf-8')
|
||||
|
||||
for line in history_content.splitlines():
|
||||
if ',' in line:
|
||||
k, v = line.strip().split(',', 2)
|
||||
tmp_history[k] = f"{watch_prefix}snapshots/{v}"
|
||||
|
||||
return tmp_history
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return {}
|
||||
|
||||
def get_history_snapshot(self, watch_uuid, timestamp):
|
||||
"""Get a history snapshot from S3
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str: The snapshot content
|
||||
"""
|
||||
history = self.get_history(watch_uuid)
|
||||
if not timestamp in history:
|
||||
return None
|
||||
|
||||
key = history[timestamp]
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
content = response['Body'].read()
|
||||
|
||||
if key.endswith('.br'):
|
||||
# Decompress brotli
|
||||
return brotli.decompress(content).decode('utf-8')
|
||||
else:
|
||||
return content.decode('utf-8')
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading snapshot from S3: {str(e)}")
|
||||
return None
|
||||
|
||||
def save_screenshot(self, watch_uuid, screenshot, as_error=False):
|
||||
"""Save a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
screenshot (bytes): Screenshot data
|
||||
as_error (bool): Whether this is an error screenshot
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
if as_error:
|
||||
key = f"{watch_prefix}last-error-screenshot.png"
|
||||
else:
|
||||
key = f"{watch_prefix}last-screenshot.png"
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=screenshot,
|
||||
ContentType='image/png'
|
||||
)
|
||||
|
||||
def get_screenshot(self, watch_uuid, is_error=False):
|
||||
"""Get a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get the error screenshot
|
||||
|
||||
Returns:
|
||||
bytes or None: The screenshot data or None if not available
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
if is_error:
|
||||
key = f"{watch_prefix}last-error-screenshot.png"
|
||||
else:
|
||||
key = f"{watch_prefix}last-screenshot.png"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
return response['Body'].read()
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return None
|
||||
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
"""Save error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Error contents
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}last-error.txt"
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=contents.encode('utf-8')
|
||||
)
|
||||
|
||||
def get_error_text(self, watch_uuid):
|
||||
"""Get error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str or False: The error text or False if not available
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}last-error.txt"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
return response['Body'].read().decode('utf-8')
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return False
|
||||
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
"""Save XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
data (dict): XPath data
|
||||
as_error (bool): Whether this is error data
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
if as_error:
|
||||
key = f"{watch_prefix}elements-error.deflate"
|
||||
else:
|
||||
key = f"{watch_prefix}elements.deflate"
|
||||
|
||||
compressed_data = zlib.compress(json.dumps(data).encode())
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=compressed_data
|
||||
)
|
||||
|
||||
def get_xpath_data(self, watch_uuid, is_error=False):
|
||||
"""Get XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get error data
|
||||
|
||||
Returns:
|
||||
dict or None: The XPath data or None if not available
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
if is_error:
|
||||
key = f"{watch_prefix}elements-error.deflate"
|
||||
else:
|
||||
key = f"{watch_prefix}elements.deflate"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
compressed_data = response['Body'].read()
|
||||
return json.loads(zlib.decompress(compressed_data).decode('utf-8'))
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return None
|
||||
|
||||
def save_last_fetched_html(self, watch_uuid, timestamp, contents):
|
||||
"""Save last fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
contents (str): HTML contents
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}html/{timestamp}.html.br"
|
||||
|
||||
contents_bytes = contents.encode('utf-8') if isinstance(contents, str) else contents
|
||||
try:
|
||||
compressed_contents = brotli.compress(contents_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"{watch_uuid} - Unable to compress HTML snapshot: {str(e)}")
|
||||
compressed_contents = contents_bytes
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=compressed_contents
|
||||
)
|
||||
|
||||
# Prune old snapshots - keep only the newest 2
|
||||
self._prune_last_fetched_html_snapshots(watch_uuid)
|
||||
|
||||
def _prune_last_fetched_html_snapshots(self, watch_uuid):
|
||||
"""Prune old HTML snapshots
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
html_prefix = f"{watch_prefix}html/"
|
||||
|
||||
# List all HTML snapshots
|
||||
response = self.s3.list_objects_v2(
|
||||
Bucket=self.bucket_name,
|
||||
Prefix=html_prefix
|
||||
)
|
||||
|
||||
if 'Contents' not in response:
|
||||
return
|
||||
|
||||
# Sort by timestamp (extract from key)
|
||||
html_files = sorted(
|
||||
response['Contents'],
|
||||
key=lambda x: int(x['Key'].split('/')[-1].split('.')[0]),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
# Delete all but the newest 2
|
||||
if len(html_files) > 2:
|
||||
for file in html_files[2:]:
|
||||
self.s3.delete_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=file['Key']
|
||||
)
|
||||
|
||||
def get_fetched_html(self, watch_uuid, timestamp):
|
||||
"""Get fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str or False: The HTML or False if not available
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}html/{timestamp}.html.br"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
compressed_data = response['Body'].read()
|
||||
return brotli.decompress(compressed_data).decode('utf-8')
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
return False
|
||||
|
||||
def save_last_text_fetched_before_filters(self, watch_uuid, contents):
|
||||
"""Save the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Text contents
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}last-fetched.br"
|
||||
|
||||
compressed_contents = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket_name,
|
||||
Key=key,
|
||||
Body=compressed_contents
|
||||
)
|
||||
|
||||
def get_last_fetched_text_before_filters(self, watch_uuid):
|
||||
"""Get the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The text
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
key = f"{watch_prefix}last-fetched.br"
|
||||
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket_name, Key=key)
|
||||
compressed_data = response['Body'].read()
|
||||
return brotli.decompress(compressed_data).decode('utf-8')
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
history = self.get_history(watch_uuid)
|
||||
dates = list(history.keys())
|
||||
|
||||
if len(dates):
|
||||
return self.get_history_snapshot(watch_uuid, dates[-1])
|
||||
else:
|
||||
return ''
|
||||
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
"""Check if visual selector data is ready
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
bool: Whether visual selector data is ready
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
screenshot_key = f"{watch_prefix}last-screenshot.png"
|
||||
elements_key = f"{watch_prefix}elements.deflate"
|
||||
|
||||
try:
|
||||
# Just check if both files exist
|
||||
self.s3.head_object(Bucket=self.bucket_name, Key=screenshot_key)
|
||||
self.s3.head_object(Bucket=self.bucket_name, Key=elements_key)
|
||||
return True
|
||||
except self.s3.exceptions.ClientError:
|
||||
return False
|
||||
|
||||
def clear_watch_history(self, watch_uuid):
|
||||
"""Clear history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
watch_prefix = self._get_watch_prefix(watch_uuid)
|
||||
|
||||
# List all objects with this watch's prefix
|
||||
paginator = self.s3.get_paginator('list_objects_v2')
|
||||
pages = paginator.paginate(
|
||||
Bucket=self.bucket_name,
|
||||
Prefix=watch_prefix
|
||||
)
|
||||
|
||||
# Delete all objects in batches
|
||||
for page in pages:
|
||||
if 'Contents' not in page:
|
||||
continue
|
||||
|
||||
delete_keys = {'Objects': [{'Key': obj['Key']} for obj in page['Contents']]}
|
||||
self.s3.delete_objects(
|
||||
Bucket=self.bucket_name,
|
||||
Delete=delete_keys
|
||||
)
|
||||
|
||||
def delete_watch(self, watch_uuid):
|
||||
"""Delete a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
# Same implementation as clear_watch_history for S3
|
||||
self.clear_watch_history(watch_uuid)
|
||||
230
changedetectionio/storage/storage_base.py
Normal file
@@ -0,0 +1,230 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import json
|
||||
from loguru import logger
|
||||
|
||||
class StorageBase(ABC):
|
||||
"""Abstract base class for storage backends"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, datastore_path, include_default_watches=True, version_tag="0.0.0"):
|
||||
"""Initialize the storage backend
|
||||
|
||||
Args:
|
||||
datastore_path (str): Path to the datastore
|
||||
include_default_watches (bool): Whether to include default watches
|
||||
version_tag (str): Version tag
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def load_data(self):
|
||||
"""Load data from the storage backend
|
||||
|
||||
Returns:
|
||||
dict: The loaded data
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_data(self, data):
|
||||
"""Save data to the storage backend
|
||||
|
||||
Args:
|
||||
data (dict): The data to save
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_history_text(self, watch_uuid, contents, timestamp, snapshot_id):
|
||||
"""Save history text to the storage backend
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Contents to save
|
||||
timestamp (int): Timestamp
|
||||
snapshot_id (str): Snapshot ID
|
||||
|
||||
Returns:
|
||||
str: Snapshot filename or ID
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_history_snapshot(self, watch_uuid, timestamp):
|
||||
"""Get a history snapshot from the storage backend
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str: The snapshot content
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_history(self, watch_uuid):
|
||||
"""Get history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
dict: The history with timestamp keys and snapshot IDs as values
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_screenshot(self, watch_uuid, screenshot, as_error=False):
|
||||
"""Save a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
screenshot (bytes): Screenshot data
|
||||
as_error (bool): Whether this is an error screenshot
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_screenshot(self, watch_uuid, is_error=False):
|
||||
"""Get a screenshot for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get the error screenshot
|
||||
|
||||
Returns:
|
||||
str or None: The screenshot path or None if not available
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
"""Save error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Error contents
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_error_text(self, watch_uuid):
|
||||
"""Get error text for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str or False: The error text or False if not available
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
"""Save XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
data (dict): XPath data
|
||||
as_error (bool): Whether this is error data
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_xpath_data(self, watch_uuid, is_error=False):
|
||||
"""Get XPath data for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
is_error (bool): Whether to get error data
|
||||
|
||||
Returns:
|
||||
dict or None: The XPath data or None if not available
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_last_fetched_html(self, watch_uuid, timestamp, contents):
|
||||
"""Save last fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
contents (str): HTML contents
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_fetched_html(self, watch_uuid, timestamp):
|
||||
"""Get fetched HTML for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
timestamp (int): Timestamp
|
||||
|
||||
Returns:
|
||||
str or False: The HTML or False if not available
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_last_text_fetched_before_filters(self, watch_uuid, contents):
|
||||
"""Save the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
contents (str): Text contents
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_last_fetched_text_before_filters(self, watch_uuid):
|
||||
"""Get the last text fetched before filters
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
str: The text
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def ensure_data_dir_exists(self, watch_uuid):
|
||||
"""Ensure the data directory exists for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
"""Check if visual selector data is ready
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
|
||||
Returns:
|
||||
bool: Whether visual selector data is ready
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def clear_watch_history(self, watch_uuid):
|
||||
"""Clear history for a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete_watch(self, watch_uuid):
|
||||
"""Delete a watch
|
||||
|
||||
Args:
|
||||
watch_uuid (str): Watch UUID
|
||||
"""
|
||||
pass
|
||||
33
changedetectionio/storage/storage_factory.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import re
|
||||
from loguru import logger
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from .storage_base import StorageBase
|
||||
from .filesystem_storage import FileSystemStorage
|
||||
from .mongodb_storage import MongoDBStorage
|
||||
from .s3_storage import S3Storage
|
||||
|
||||
def create_storage(datastore_path, include_default_watches=True, version_tag="0.0.0"):
|
||||
"""Create a storage backend based on the datastore path
|
||||
|
||||
Args:
|
||||
datastore_path (str): Path to the datastore
|
||||
include_default_watches (bool): Whether to include default watches
|
||||
version_tag (str): Version tag
|
||||
|
||||
Returns:
|
||||
StorageBase: The storage backend
|
||||
"""
|
||||
# Check if it's a MongoDB URI
|
||||
if datastore_path.startswith('mongodb://') or datastore_path.startswith('mongodb+srv://'):
|
||||
logger.info(f"Using MongoDB storage backend with URI {datastore_path}")
|
||||
return MongoDBStorage(datastore_path, include_default_watches, version_tag)
|
||||
|
||||
# Check if it's an S3 URI
|
||||
if datastore_path.startswith('s3://'):
|
||||
logger.info(f"Using S3 storage backend with URI {datastore_path}")
|
||||
return S3Storage(datastore_path, include_default_watches, version_tag)
|
||||
|
||||
# Default to filesystem
|
||||
logger.info(f"Using filesystem storage backend with path {datastore_path}")
|
||||
return FileSystemStorage(datastore_path, include_default_watches, version_tag)
|
||||
@@ -20,6 +20,7 @@ from loguru import logger
|
||||
|
||||
from .processors import get_custom_watch_obj_for_processor
|
||||
from .processors.restock_diff import Restock
|
||||
from .storage.storage_factory import create_storage
|
||||
|
||||
# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
|
||||
BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
|
||||
@@ -38,20 +39,28 @@ class ChangeDetectionStore:
|
||||
needs_write_urgent = False
|
||||
|
||||
__version_check = True
|
||||
|
||||
# Singleton instance for access from Watch class methods
|
||||
instance = None
|
||||
|
||||
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
|
||||
# Should only be active for docker
|
||||
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
|
||||
self.__data = App.model()
|
||||
self.datastore_path = datastore_path
|
||||
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
|
||||
logger.info(f"Datastore path is '{self.json_store_path}'")
|
||||
|
||||
# Create the appropriate storage backend based on the datastore path
|
||||
self.storage = create_storage(datastore_path, include_default_watches, version_tag)
|
||||
|
||||
self.needs_write = False
|
||||
self.start_time = time.time()
|
||||
self.stop_thread = False
|
||||
# Base definition for all watchers
|
||||
# deepcopy part of #569 - not sure why its needed exactly
|
||||
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
|
||||
self.generic_definition = deepcopy(Watch.model(datastore_path=datastore_path, default={}))
|
||||
|
||||
# Set singleton instance
|
||||
ChangeDetectionStore.instance = self
|
||||
|
||||
if path.isfile('changedetectionio/source.txt'):
|
||||
with open('changedetectionio/source.txt') as f:
|
||||
@@ -60,10 +69,9 @@ class ChangeDetectionStore:
|
||||
self.__data['build_sha'] = f.read()
|
||||
|
||||
try:
|
||||
# @todo retest with ", encoding='utf-8'"
|
||||
with open(self.json_store_path) as json_file:
|
||||
from_disk = json.load(json_file)
|
||||
|
||||
# Load data from storage
|
||||
from_disk = self.storage.load_data()
|
||||
if from_disk:
|
||||
# @todo isnt there a way todo this dict.update recursively?
|
||||
# Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
|
||||
if 'watching' in from_disk:
|
||||
@@ -91,22 +99,24 @@ class ChangeDetectionStore:
|
||||
for uuid, tag in self.__data['settings']['application']['tags'].items():
|
||||
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(uuid, tag, processor_override='restock_diff')
|
||||
logger.info(f"Tag: {uuid} {tag['title']}")
|
||||
else:
|
||||
# First time ran, Create the datastore.
|
||||
if include_default_watches:
|
||||
logger.critical(f"No data store found, creating new store")
|
||||
self.add_watch(url='https://news.ycombinator.com/',
|
||||
tag='Tech news',
|
||||
extras={'fetch_backend': 'html_requests'})
|
||||
|
||||
# First time ran, Create the datastore.
|
||||
except (FileNotFoundError):
|
||||
if include_default_watches:
|
||||
logger.critical(f"No JSON DB found at {self.json_store_path}, creating JSON store at {self.datastore_path}")
|
||||
self.add_watch(url='https://news.ycombinator.com/',
|
||||
tag='Tech news',
|
||||
extras={'fetch_backend': 'html_requests'})
|
||||
self.add_watch(url='https://changedetection.io/CHANGELOG.txt',
|
||||
tag='changedetection.io',
|
||||
extras={'fetch_backend': 'html_requests'})
|
||||
|
||||
self.add_watch(url='https://changedetection.io/CHANGELOG.txt',
|
||||
tag='changedetection.io',
|
||||
extras={'fetch_backend': 'html_requests'})
|
||||
|
||||
updates_available = self.get_updates_available()
|
||||
self.__data['settings']['application']['schema_version'] = updates_available.pop()
|
||||
updates_available = self.get_updates_available()
|
||||
self.__data['settings']['application']['schema_version'] = updates_available.pop()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading data from storage: {str(e)}")
|
||||
raise e
|
||||
else:
|
||||
# Bump the update version by running updates
|
||||
self.run_updates()
|
||||
@@ -227,23 +237,15 @@ class ChangeDetectionStore:
|
||||
|
||||
# Delete a single watch by UUID
|
||||
def delete(self, uuid):
|
||||
import pathlib
|
||||
import shutil
|
||||
|
||||
with self.lock:
|
||||
if uuid == 'all':
|
||||
# Delete all watches
|
||||
for watch_uuid in list(self.data['watching'].keys()):
|
||||
self.storage.delete_watch(watch_uuid)
|
||||
self.__data['watching'] = {}
|
||||
|
||||
# GitHub #30 also delete history records
|
||||
for uuid in self.data['watching']:
|
||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||
if os.path.exists(path):
|
||||
shutil.rmtree(path)
|
||||
|
||||
else:
|
||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||
if os.path.exists(path):
|
||||
shutil.rmtree(path)
|
||||
# Delete a single watch
|
||||
self.storage.delete_watch(uuid)
|
||||
del self.data['watching'][uuid]
|
||||
|
||||
self.needs_write_urgent = True
|
||||
@@ -266,6 +268,7 @@ class ChangeDetectionStore:
|
||||
|
||||
# Remove a watchs data but keep the entry (URL etc)
|
||||
def clear_watch_history(self, uuid):
|
||||
self.storage.clear_watch_history(uuid)
|
||||
self.__data['watching'][uuid].clear_watch()
|
||||
self.needs_write_urgent = True
|
||||
|
||||
@@ -372,43 +375,30 @@ class ChangeDetectionStore:
|
||||
return new_uuid
|
||||
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
|
||||
screenshot_filename = "{}/last-screenshot.png".format(output_path)
|
||||
elements_index_filename = "{}/elements.deflate".format(output_path)
|
||||
if path.isfile(screenshot_filename) and path.isfile(elements_index_filename) :
|
||||
return True
|
||||
|
||||
return False
|
||||
return self.storage.visualselector_data_is_ready(watch_uuid)
|
||||
|
||||
def sync_to_json(self):
|
||||
logger.info("Saving JSON..")
|
||||
logger.info("Saving data to storage backend...")
|
||||
try:
|
||||
data = deepcopy(self.__data)
|
||||
except RuntimeError as e:
|
||||
# Try again in 15 seconds
|
||||
time.sleep(15)
|
||||
logger.error(f"! Data changed when writing to JSON, trying again.. {str(e)}")
|
||||
logger.error(f"! Data changed when writing to storage, trying again.. {str(e)}")
|
||||
self.sync_to_json()
|
||||
return
|
||||
else:
|
||||
|
||||
try:
|
||||
# Re #286 - First write to a temp file, then confirm it looks OK and rename it
|
||||
# This is a fairly basic strategy to deal with the case that the file is corrupted,
|
||||
# system was out of memory, out of RAM etc
|
||||
with open(self.json_store_path+".tmp", 'w') as json_file:
|
||||
json.dump(data, json_file, indent=4)
|
||||
os.replace(self.json_store_path+".tmp", self.json_store_path)
|
||||
self.storage.save_data(data)
|
||||
except Exception as e:
|
||||
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
|
||||
logger.error(f"Error writing to storage backend: {str(e)}")
|
||||
|
||||
self.needs_write = False
|
||||
self.needs_write_urgent = False
|
||||
|
||||
# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON
|
||||
# Thread runner, this helps with thread/write issues when there are many operations that want to update the data
|
||||
# by just running periodically in one thread, according to python, dict updates are threadsafe.
|
||||
def save_datastore(self):
|
||||
|
||||
while True:
|
||||
if self.stop_thread:
|
||||
# Suppressing "Logging error in Loguru Handler #0" during CICD.
|
||||
|
||||
@@ -12,13 +12,13 @@
|
||||
}}
|
||||
<div class="pure-form-message-inline">
|
||||
<p>
|
||||
<strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
|
||||
<strong>Tip:</strong> Use <a target="newwindow" href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
|
||||
</p>
|
||||
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
|
||||
<ul style="display: none" id="advanced-help-notifications">
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
|
||||
<li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
|
||||
<li><code>gets://</code>, <code>posts://</code>, <code>puts://</code>, <code>deletes://</code> for direct API calls (or omit the "<code>s</code>" for non-SSL ie <code>get://</code>) <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes#postposts">more help here</a></li>
|
||||
<li>Accepts the <code>{{ '{{token}}' }}</code> placeholders listed below</li>
|
||||
</ul>
|
||||
@@ -40,7 +40,7 @@
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.notification_body , rows=5, class="notification-body", placeholder=settings_application['notification_body']) }}
|
||||
<span class="pure-form-message-inline">Body for all notifications ‐ You can use <a target="_new" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> templating in the notification title, body and URL, and tokens from below.
|
||||
<span class="pure-form-message-inline">Body for all notifications ‐ You can use <a target="newwindow" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> templating in the notification title, body and URL, and tokens from below.
|
||||
</span>
|
||||
|
||||
</div>
|
||||
@@ -126,7 +126,7 @@
|
||||
<div class="pure-form-message-inline">
|
||||
<p>
|
||||
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. <br>
|
||||
For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
|
||||
For example, an addition or removal could be perceived as a change in some cases. <a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
|
||||
</p>
|
||||
<p>
|
||||
For JSON payloads, use <strong>|tojson</strong> without quotes for automatic escaping, for example - <code>{ "name": {{ '{{ watch_title|tojson }}' }} }</code>
|
||||
|
||||
@@ -61,6 +61,18 @@
|
||||
{{ field(**kwargs)|safe }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro playwright_warning() %}
|
||||
<p><strong>Error - Playwright support for Chrome based fetching is not enabled.</strong> Alternatively try our <a href="https://changedetection.io">very affordable subscription based service which has all this setup for you</a>.</p>
|
||||
<p>You may need to <a href="https://github.com/dgtlmoon/changedetection.io/blob/09ebc6ec6338545bdd694dc6eee57f2e9d2b8075/docker-compose.yml#L31">Enable playwright environment variable</a> and uncomment the <strong>sockpuppetbrowser</strong> in the <a href="https://github.com/dgtlmoon/changedetection.io/blob/master/docker-compose.yml">docker-compose.yml</a> file.</p>
|
||||
<br>
|
||||
<p>(Also Selenium/WebDriver can not extract full page screenshots reliably so Playwright is recommended here)</p>
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
{% macro only_webdriver_type_watches_warning() %}
|
||||
<p><strong>Sorry, this functionality only works with Playwright/Chrome enabled watches.<br>You need to <a href="#request">Set the fetch method to Playwright/Chrome mode and resave</a> and have the Playwright connection enabled.</strong></p><br>
|
||||
{% endmacro %}
|
||||
|
||||
{% macro render_time_schedule_form(form, available_timezones, timezone_default_config) %}
|
||||
<style>
|
||||
.day-schedule *, .day-schedule select {
|
||||
|
||||
@@ -159,7 +159,7 @@
|
||||
<a id="chrome-extension-link"
|
||||
title="Try our new Chrome Extension!"
|
||||
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
||||
<img src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}">
|
||||
<img alt="Chrome store icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}">
|
||||
Chrome Webstore
|
||||
</a>
|
||||
</p>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
|
||||
@@ -40,14 +40,13 @@
|
||||
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab" id=""><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#request">Request</a></li>
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
{% if playwright_enabled %}
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
|
||||
{% endif %}
|
||||
<!-- should goto extra forms? -->
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
@@ -199,8 +198,9 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
{% if playwright_enabled %}
|
||||
|
||||
<div class="tab-pane-inner" id="browser-steps">
|
||||
{% if playwright_enabled and watch_uses_webdriver %}
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
@@ -224,7 +224,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
<span class="loader" >
|
||||
<span id="browsersteps-click-start">
|
||||
<h2 >Click here to Start</h2>
|
||||
<svg style="height: 3.5rem;" version="1.1" viewBox="0 0 32 32" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g id="Layer_1"/><g id="play_x5F_alt"><path d="M16,0C7.164,0,0,7.164,0,16s7.164,16,16,16s16-7.164,16-16S24.836,0,16,0z M10,24V8l16.008,8L10,24z" style="fill: var(--color-grey-400);"/></g></svg><br>
|
||||
<svg style="height: 3.5rem;" version="1.1" viewBox="0 0 32 32" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g id="start"/><g id="play_x5F_alt"><path d="M16,0C7.164,0,0,7.164,0,16s7.164,16,16,16s16-7.164,16-16S24.836,0,16,0z M10,24V8l16.008,8L10,24z" style="fill: var(--color-grey-400);"/></g></svg><br>
|
||||
Please allow 10-15 seconds for the browser to connect.<br>
|
||||
</span>
|
||||
<div class="spinner" style="display: none;"></div>
|
||||
@@ -234,21 +234,31 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</div>
|
||||
<div id="browser-steps-fieldlist" >
|
||||
<span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
<span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
{{ render_field(form.browser_steps) }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
{% else %}
|
||||
<span class="pure-form-message-inline">
|
||||
{% if not watch_uses_webdriver %}
|
||||
{{ only_webdriver_type_watches_warning() }}
|
||||
{% endif %}
|
||||
{% if not playwright_enabled %}
|
||||
{{ playwright_warning() }}
|
||||
{% endif %}
|
||||
</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
||||
<div class="tab-pane-inner" id="notifications">
|
||||
<fieldset>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_checkbox_field(form.notification_muted) }}
|
||||
</div>
|
||||
{% if is_html_webdriver %}
|
||||
{% if watch_uses_webdriver %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_checkbox_field(form.notification_screenshot) }}
|
||||
<span class="pure-form-message-inline">
|
||||
@@ -298,7 +308,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
|
||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br>
|
||||
{% endif %}
|
||||
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
|
||||
<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
|
||||
<span data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</span><br>
|
||||
<ul id="advanced-help-selectors" style="display: none;">
|
||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
|
||||
@@ -471,7 +481,7 @@ keyword") }}
|
||||
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{% if visualselector_enabled %}
|
||||
{% if playwright_enabled and watch_uses_webdriver %}
|
||||
<span class="pure-form-message-inline" id="visual-selector-heading">
|
||||
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
|
||||
</span>
|
||||
@@ -489,11 +499,12 @@ keyword") }}
|
||||
</div>
|
||||
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong> <span class="text">Loading...</span></div>
|
||||
{% else %}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>Sorry, this functionality only works with Playwright/Chrome enabled watches.</p>
|
||||
<p>Enable the Playwright Chrome fetcher, or alternatively try our <a href="https://lemonade.changedetection.io/start">very affordable subscription based service</a>.</p>
|
||||
<p>This is because Selenium/WebDriver can not extract full page screenshots reliably.</p>
|
||||
</span>
|
||||
{% if not watch_uses_webdriver %}
|
||||
{{ only_webdriver_type_watches_warning() }}
|
||||
{% endif %}
|
||||
{% if not playwright_enabled %}
|
||||
{{ playwright_warning() }}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
@@ -214,7 +214,7 @@ nav
|
||||
<a id="chrome-extension-link"
|
||||
title="Try our new Chrome Extension!"
|
||||
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
||||
<img src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome">
|
||||
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome">
|
||||
Chrome Webstore
|
||||
</a>
|
||||
</p>
|
||||
@@ -280,9 +280,7 @@ nav
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<p>
|
||||
Your proxy provider may need to whitelist our IP of <code>204.15.192.195</code>
|
||||
</p>
|
||||
|
||||
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
|
||||
@@ -1 +1 @@
|
||||
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 122.879 119.799" enable-background="new 0 0 122.879 119.799" xml:space="preserve"><g><path d="M49.988,0h0.016v0.007C63.803,0.011,76.298,5.608,85.34,14.652c9.027,9.031,14.619,21.515,14.628,35.303h0.007v0.033v0.04 h-0.007c-0.005,5.557-0.917,10.905-2.594,15.892c-0.281,0.837-0.575,1.641-0.877,2.409v0.007c-1.446,3.66-3.315,7.12-5.547,10.307 l29.082,26.139l0.018,0.016l0.157,0.146l0.011,0.011c1.642,1.563,2.536,3.656,2.649,5.78c0.11,2.1-0.543,4.248-1.979,5.971 l-0.011,0.016l-0.175,0.203l-0.035,0.035l-0.146,0.16l-0.016,0.021c-1.565,1.642-3.654,2.534-5.78,2.646 c-2.097,0.111-4.247-0.54-5.971-1.978l-0.015-0.011l-0.204-0.175l-0.029-0.024L78.761,90.865c-0.88,0.62-1.778,1.209-2.687,1.765 c-1.233,0.755-2.51,1.466-3.813,2.115c-6.699,3.342-14.269,5.222-22.272,5.222v0.007h-0.016v-0.007 c-13.799-0.004-26.296-5.601-35.338-14.645C5.605,76.291,0.016,63.805,0.007,50.021H0v-0.033v-0.016h0.007 c0.004-13.799,5.601-26.296,14.645-35.338C23.683,5.608,36.167,0.016,49.955,0.007V0H49.988L49.988,0z M50.004,11.21v0.007h-0.016 h-0.033V11.21c-10.686,0.007-20.372,4.35-27.384,11.359C15.56,29.578,11.213,39.274,11.21,49.973h0.007v0.016v0.033H11.21 c0.007,10.686,4.347,20.367,11.359,27.381c7.009,7.012,16.705,11.359,27.403,11.361v-0.007h0.016h0.033v0.007 c10.686-0.007,20.368-4.348,27.382-11.359c7.011-7.009,11.358-16.702,11.36-27.4h-0.006v-0.016v-0.033h0.006 c-0.006-10.686-4.35-20.372-11.358-27.384C70.396,15.56,60.703,11.213,50.004,11.21L50.004,11.21z"/></g></svg>
|
||||
<svg version="1.1" id="search" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 122.879 119.799" enable-background="new 0 0 122.879 119.799" xml:space="preserve"><g><path d="M49.988,0h0.016v0.007C63.803,0.011,76.298,5.608,85.34,14.652c9.027,9.031,14.619,21.515,14.628,35.303h0.007v0.033v0.04 h-0.007c-0.005,5.557-0.917,10.905-2.594,15.892c-0.281,0.837-0.575,1.641-0.877,2.409v0.007c-1.446,3.66-3.315,7.12-5.547,10.307 l29.082,26.139l0.018,0.016l0.157,0.146l0.011,0.011c1.642,1.563,2.536,3.656,2.649,5.78c0.11,2.1-0.543,4.248-1.979,5.971 l-0.011,0.016l-0.175,0.203l-0.035,0.035l-0.146,0.16l-0.016,0.021c-1.565,1.642-3.654,2.534-5.78,2.646 c-2.097,0.111-4.247-0.54-5.971-1.978l-0.015-0.011l-0.204-0.175l-0.029-0.024L78.761,90.865c-0.88,0.62-1.778,1.209-2.687,1.765 c-1.233,0.755-2.51,1.466-3.813,2.115c-6.699,3.342-14.269,5.222-22.272,5.222v0.007h-0.016v-0.007 c-13.799-0.004-26.296-5.601-35.338-14.645C5.605,76.291,0.016,63.805,0.007,50.021H0v-0.033v-0.016h0.007 c0.004-13.799,5.601-26.296,14.645-35.338C23.683,5.608,36.167,0.016,49.955,0.007V0H49.988L49.988,0z M50.004,11.21v0.007h-0.016 h-0.033V11.21c-10.686,0.007-20.372,4.35-27.384,11.359C15.56,29.578,11.213,39.274,11.21,49.973h0.007v0.016v0.033H11.21 c0.007,10.686,4.347,20.367,11.359,27.381c7.009,7.012,16.705,11.359,27.403,11.361v-0.007h0.016h0.033v0.007 c10.686-0.007,20.368-4.348,27.382-11.359c7.011-7.009,11.358-16.702,11.36-27.4h-0.006v-0.016v-0.033h0.006 c-0.006-10.686-4.35-20.372-11.358-27.384C70.396,15.56,60.703,11.213,50.004,11.21L50.004,11.21z"/></g></svg>
|
||||
|
Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 1.5 KiB |
@@ -108,7 +108,8 @@
|
||||
{% else %}
|
||||
<a class="state-on" href="{{url_for('index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
|
||||
{% endif %}
|
||||
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
|
||||
{% set mute_label = 'UnMute notification' if watch.notification_muted else 'Mute notification' %}
|
||||
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ mute_label }}" title="{{ mute_label }}" class="icon icon-mute" ></a>
|
||||
</td>
|
||||
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
|
||||
@@ -118,7 +119,7 @@
|
||||
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
||||
or "extra_browser_" in watch.get_fetch_backend
|
||||
%}
|
||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a Chrome browser" >
|
||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
||||
{% endif %}
|
||||
|
||||
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
|
||||
|
||||
@@ -34,7 +34,7 @@ def test_execute_custom_js(client, live_server, measure_memory_usage):
|
||||
assert b"unpaused" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n >= 1, "Watch history had atleast 1 (everything fetched OK)"
|
||||
|
||||
assert b"This text should be removed" not in res.data
|
||||
|
||||
@@ -48,7 +48,7 @@ def test_noproxy_option(client, live_server, measure_memory_usage):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid=uuid, unpause_on_save=1))
|
||||
assert b'No proxy' in res.data
|
||||
|
||||
@@ -81,7 +81,7 @@ def test_socks5(client, live_server, measure_memory_usage):
|
||||
assert "Awesome, you made it".encode('utf-8') in res.data
|
||||
|
||||
# PROXY CHECKER WIDGET CHECK - this needs more checking
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
res = client.get(
|
||||
url_for("check_proxies.start_check", uuid=uuid),
|
||||
|
||||
@@ -99,7 +99,7 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
||||
assert b'ldjson-price-track-offer' in res.data
|
||||
|
||||
# Accept it
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
#time.sleep(1)
|
||||
client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
@@ -68,7 +68,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# Check the 'get latest snapshot works'
|
||||
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
|
||||
|
||||
@@ -40,7 +40,7 @@ def test_check_encoding_detection(client, live_server, measure_memory_usage):
|
||||
|
||||
|
||||
# Content type recording worked
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['content-type'] == "text/html"
|
||||
|
||||
res = client.get(
|
||||
|
||||
@@ -51,7 +51,7 @@ def run_filter_test(client, live_server, content_filter):
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"
|
||||
|
||||
|
||||
@@ -288,7 +288,7 @@ def test_clone_tag_on_import(client, live_server, measure_memory_usage):
|
||||
assert b'test-tag' in res.data
|
||||
assert b'another-tag' in res.data
|
||||
|
||||
watch_uuid = extract_UUID_from_client(client)
|
||||
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.get(url_for("form_clone", uuid=watch_uuid), follow_redirects=True)
|
||||
|
||||
assert b'Cloned' in res.data
|
||||
@@ -315,7 +315,7 @@ def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usa
|
||||
assert b'test-tag' in res.data
|
||||
assert b'another-tag' in res.data
|
||||
|
||||
watch_uuid = extract_UUID_from_client(client)
|
||||
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.get(url_for("form_clone", uuid=watch_uuid), follow_redirects=True)
|
||||
|
||||
assert b'Cloned' in res.data
|
||||
|
||||
@@ -36,7 +36,7 @@ def test_ignore(client, live_server, measure_memory_usage):
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
# use the highlighter endpoint
|
||||
res = client.post(
|
||||
url_for("highlight_submit_ignore_url", uuid=uuid),
|
||||
|
||||
@@ -514,3 +514,15 @@ def test_check_jq_ext_filter(client, live_server, measure_memory_usage):
|
||||
def test_check_jqraw_ext_filter(client, live_server, measure_memory_usage):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||
|
||||
def test_jsonpath_BOM_utf8(client, live_server, measure_memory_usage):
|
||||
from .. import html_tools
|
||||
|
||||
# JSON string with BOM and correct double-quoted keys
|
||||
json_str = '\ufeff{"name": "José", "emoji": "😊", "language": "中文", "greeting": "Привет"}'
|
||||
|
||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||
text = html_tools.extract_json_as_string(json_str, "json:$.name")
|
||||
assert text == '"José"'
|
||||
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ def test_content_filter_live_preview(client, live_server, measure_memory_usage):
|
||||
data={"url": test_url, "tags": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid=uuid),
|
||||
data={
|
||||
|
||||
@@ -6,7 +6,7 @@ from flask import url_for
|
||||
from loguru import logger
|
||||
|
||||
from .util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup, wait_for_all_checks, \
|
||||
set_longer_modified_response
|
||||
set_longer_modified_response, get_index
|
||||
from . util import extract_UUID_from_client
|
||||
import logging
|
||||
import base64
|
||||
@@ -76,7 +76,7 @@ def test_check_notification(client, live_server, measure_memory_usage):
|
||||
testimage_png = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='
|
||||
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
datastore = 'test-datastore'
|
||||
with open(os.path.join(datastore, str(uuid), 'last-screenshot.png'), 'wb') as f:
|
||||
f.write(base64.b64decode(testimage_png))
|
||||
@@ -328,7 +328,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
||||
|
||||
|
||||
# Check no errors were recorded, because we asked for 204 which is slightly uncommon but is still OK
|
||||
res = client.get(url_for("index"))
|
||||
res = get_index(client)
|
||||
assert b'notification-error' not in res.data
|
||||
|
||||
with open("test-datastore/notification.txt", 'r') as f:
|
||||
|
||||
@@ -373,13 +373,14 @@ def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
|
||||
wait_for_all_checks(client)
|
||||
|
||||
with open('test-datastore/headers-testtag.txt', 'w') as f:
|
||||
f.write("tag-header: test")
|
||||
f.write("tag-header: test\r\nurl-header: http://example.com")
|
||||
|
||||
with open('test-datastore/headers.txt', 'w') as f:
|
||||
f.write("global-header: nice\r\nnext-global-header: nice")
|
||||
f.write("global-header: nice\r\nnext-global-header: nice\r\nurl-header-global: http://example.com/global")
|
||||
|
||||
with open('test-datastore/' + extract_UUID_from_client(client) + '/headers.txt', 'w') as f:
|
||||
f.write("watch-header: nice")
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
with open(f'test-datastore/{uuid}/headers.txt', 'w') as f:
|
||||
f.write("watch-header: nice\r\nurl-header-watch: http://example.com/watch")
|
||||
|
||||
wait_for_all_checks(client)
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
@@ -410,6 +411,9 @@ def test_headers_textfile_in_request(client, live_server, measure_memory_usage):
|
||||
assert b"Xxx:ooo" in res.data
|
||||
assert b"Watch-Header:nice" in res.data
|
||||
assert b"Tag-Header:test" in res.data
|
||||
assert b"Url-Header:http://example.com" in res.data
|
||||
assert b"Url-Header-Global:http://example.com/global" in res.data
|
||||
assert b"Url-Header-Watch:http://example.com/watch" in res.data
|
||||
|
||||
# Check the custom UA from system settings page made it through
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL'):
|
||||
|
||||
@@ -380,7 +380,7 @@ def test_change_with_notification_values(client, live_server):
|
||||
|
||||
## Now test the "SEND TEST NOTIFICATION" is working
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.post(url_for("ajax_callback_send_notification_test", watch_uuid=uuid), data={}, follow_redirects=True)
|
||||
time.sleep(5)
|
||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
|
||||
|
||||
@@ -132,7 +132,7 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
|
||||
)
|
||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid=uuid, unpause_on_save=1),
|
||||
data={
|
||||
|
||||
@@ -39,7 +39,7 @@ def test_check_basic_scheduler_functionality(client, live_server, measure_memory
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# Setup all the days of the weeks using XXX as the placeholder for monday/tuesday/etc
|
||||
|
||||
@@ -104,7 +104,7 @@ def test_check_basic_global_scheduler_functionality(client, live_server, measure
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# Setup all the days of the weeks using XXX as the placeholder for monday/tuesday/etc
|
||||
|
||||
|
||||
@@ -285,15 +285,43 @@ def live_server_setup(live_server):
|
||||
<p id="remove">This text should be removed</p>
|
||||
<form onsubmit="event.preventDefault();">
|
||||
<!-- obfuscated text so that we dont accidentally get a false positive due to conversion of the source :) --->
|
||||
<button name="test-button" onclick="getElementById('remove').remove();getElementById('some-content').innerHTML = atob('SSBzbWVsbCBKYXZhU2NyaXB0IGJlY2F1c2UgdGhlIGJ1dHRvbiB3YXMgcHJlc3NlZCE=')">Click here</button>
|
||||
<div id=some-content></div>
|
||||
<button name="test-button" onclick="
|
||||
getElementById('remove').remove();
|
||||
getElementById('some-content').innerHTML = atob('SSBzbWVsbCBKYXZhU2NyaXB0IGJlY2F1c2UgdGhlIGJ1dHRvbiB3YXMgcHJlc3NlZCE=');
|
||||
getElementById('reflect-text').innerHTML = getElementById('test-input-text').value;
|
||||
">Click here</button>
|
||||
|
||||
<div id="some-content"></div>
|
||||
|
||||
<pre>
|
||||
{header_text.lower()}
|
||||
</pre>
|
||||
</body>
|
||||
|
||||
<br>
|
||||
<!-- used for testing that the jinja2 compiled here --->
|
||||
<input type="text" value="" id="test-input-text" /><br>
|
||||
<div id="reflect-text">Waiting to reflect text from #test-input-text here</div>
|
||||
</form>
|
||||
|
||||
</body>
|
||||
</html>""", 200)
|
||||
resp.headers['Content-Type'] = 'text/html'
|
||||
return resp
|
||||
|
||||
live_server.start()
|
||||
|
||||
def get_index(client):
|
||||
import inspect
|
||||
# Get the caller's frame (parent function)
|
||||
frame = inspect.currentframe()
|
||||
caller_frame = frame.f_back # Go back to the caller's frame
|
||||
caller_name = caller_frame.f_code.co_name
|
||||
caller_line = caller_frame.f_lineno
|
||||
|
||||
print(f"Called by: {caller_name}, Line: {caller_line}")
|
||||
|
||||
res = client.get(url_for("index"))
|
||||
with open(f"test-datastore/index-{caller_name}-{caller_line}.html", 'wb') as f:
|
||||
f.write(res.data)
|
||||
|
||||
return res
|
||||
|
||||
@@ -2,14 +2,16 @@
|
||||
|
||||
import os
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
from ..util import live_server_setup, wait_for_all_checks, get_index
|
||||
|
||||
def test_setup(client, live_server, measure_memory_usage):
|
||||
def test_setup(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
|
||||
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
|
||||
def test_visual_selector_content_ready(client, live_server, measure_memory_usage):
|
||||
live_server.stop()
|
||||
live_server.start()
|
||||
|
||||
import os
|
||||
import json
|
||||
@@ -27,7 +29,7 @@ def test_visual_selector_content_ready(client, live_server, measure_memory_usage
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid=uuid, unpause_on_save=1),
|
||||
data={
|
||||
@@ -87,7 +89,9 @@ def test_visual_selector_content_ready(client, live_server, measure_memory_usage
|
||||
|
||||
def test_basic_browserstep(client, live_server, measure_memory_usage):
|
||||
|
||||
#live_server_setup(live_server)
|
||||
live_server.stop()
|
||||
live_server.start()
|
||||
|
||||
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||
|
||||
test_url = url_for('test_interactive_html_endpoint', _external=True)
|
||||
@@ -108,9 +112,13 @@ def test_basic_browserstep(client, live_server, measure_memory_usage):
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
'fetch_backend': "html_webdriver",
|
||||
'browser_steps-0-operation': 'Click element',
|
||||
'browser_steps-0-selector': 'button[name=test-button]',
|
||||
'browser_steps-0-optional_value': '',
|
||||
'browser_steps-0-operation': 'Enter text in field',
|
||||
'browser_steps-0-selector': '#test-input-text',
|
||||
# Should get set to the actual text (jinja2 rendered)
|
||||
'browser_steps-0-optional_value': "Hello-Jinja2-{% now 'Europe/Berlin', '%Y-%m-%d' %}",
|
||||
'browser_steps-1-operation': 'Click element',
|
||||
'browser_steps-1-selector': 'button[name=test-button]',
|
||||
'browser_steps-1-optional_value': '',
|
||||
# For now, cookies doesnt work in headers because it must be a full cookiejar object
|
||||
'headers': "testheader: yes\buser-agent: MyCustomAgent",
|
||||
},
|
||||
@@ -119,7 +127,7 @@ def test_basic_browserstep(client, live_server, measure_memory_usage):
|
||||
assert b"unpaused" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = extract_UUID_from_client(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n >= 1, "Watch history had atleast 1 (everything fetched OK)"
|
||||
|
||||
assert b"This text should be removed" not in res.data
|
||||
@@ -132,13 +140,32 @@ def test_basic_browserstep(client, live_server, measure_memory_usage):
|
||||
assert b"This text should be removed" not in res.data
|
||||
assert b"I smell JavaScript because the button was pressed" in res.data
|
||||
|
||||
assert b'Hello-Jinja2-20' in res.data
|
||||
|
||||
assert b"testheader: yes" in res.data
|
||||
assert b"user-agent: mycustomagent" in res.data
|
||||
live_server.stop()
|
||||
|
||||
def test_non_200_errors_report_browsersteps(client, live_server):
|
||||
|
||||
live_server.stop()
|
||||
live_server.start()
|
||||
|
||||
four_o_four_url = url_for('test_endpoint', status_code=404, _external=True)
|
||||
four_o_four_url = four_o_four_url.replace('localhost.localdomain', 'cdio')
|
||||
four_o_four_url = four_o_four_url.replace('localhost', 'cdio')
|
||||
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": four_o_four_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# now test for 404 errors
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid=uuid, unpause_on_save=1),
|
||||
@@ -153,12 +180,14 @@ def test_basic_browserstep(client, live_server, measure_memory_usage):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"unpaused" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("index"))
|
||||
res = get_index(client)
|
||||
|
||||
assert b'Error - 404' in res.data
|
||||
|
||||
client.get(
|
||||
url_for("form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
version: '3.2'
|
||||
services:
|
||||
changedetection:
|
||||
image: ghcr.io/dgtlmoon/changedetection.io
|
||||
@@ -82,7 +81,7 @@ services:
|
||||
|
||||
|
||||
# Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
|
||||
# RECOMMENDED FOR FETCHING PAGES WITH CHROME
|
||||
# RECOMMENDED FOR FETCHING PAGES WITH CHROME, be sure to enable the "PLAYWRIGHT_DRIVER_URL" env variable in the main changedetection container
|
||||
# sockpuppetbrowser:
|
||||
# hostname: sockpuppetbrowser
|
||||
# image: dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
@@ -100,3 +100,6 @@ referencing==0.35.1
|
||||
|
||||
# Scheduler - Windows seemed to miss a lot of default timezone info (even "UTC" !)
|
||||
tzdata
|
||||
|
||||
pymongo>=4.3.3
|
||||
boto3>=1.26.0
|
||||
|
||||