mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-28 10:50:41 +00:00
Compare commits
8 Commits
visualsele
...
pip-securi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d2daa6f28b | ||
|
|
963869b40a | ||
|
|
2f91695293 | ||
|
|
162a77079c | ||
|
|
01b81f4dbc | ||
|
|
eb2bd1ec8c | ||
|
|
32f490783a | ||
|
|
2819e05615 |
@@ -30,7 +30,7 @@ RUN pip install --target=/dependencies -r /requirements.txt
|
||||
# Playwright is an alternative to Selenium
|
||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
|
||||
RUN pip install --target=/dependencies playwright~=1.39 \
|
||||
RUN pip install --target=/dependencies playwright~=1.27.1 \
|
||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||
|
||||
# Final image stage
|
||||
|
||||
@@ -232,13 +232,6 @@ See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configura
|
||||
|
||||
Raspberry Pi and linux/arm/v6 linux/arm/v7 arm64 devices are supported! See the wiki for [details](https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver)
|
||||
|
||||
## Import support
|
||||
|
||||
Easily [import your list of websites to watch for changes in Excel .xslx file format](https://changedetection.io/tutorial/how-import-your-website-change-detection-lists-excel), or paste in lists of website URLs as plaintext.
|
||||
|
||||
Excel import is recommended - that way you can better organise tags/groups of websites and other features.
|
||||
|
||||
|
||||
## API Support
|
||||
|
||||
Supports managing the website watch list [via our API](https://changedetection.io/docs/api_v1/index.html)
|
||||
|
||||
@@ -1208,7 +1208,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# These files should be in our subdirectory
|
||||
try:
|
||||
# set nocache, set content-type
|
||||
response = make_response(send_from_directory(os.path.join(datastore_o.datastore_path, filename), "elements.json"))
|
||||
watch_dir = datastore_o.datastore_path + "/" + filename
|
||||
response = make_response(send_from_directory(filename="elements.json", directory=watch_dir, path=watch_dir + "/elements.json"))
|
||||
response.headers['Content-type'] = 'application/json'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
|
||||
@@ -137,7 +137,6 @@ class import_distill_io_json(Importer):
|
||||
|
||||
flash("{} Imported from Distill.io in {:.2f}s, {} Skipped.".format(len(self.new_uuids), time.time() - now, len(self.remaining_data)))
|
||||
|
||||
|
||||
class import_xlsx_wachete(Importer):
|
||||
|
||||
def run(self,
|
||||
@@ -145,7 +144,6 @@ class import_xlsx_wachete(Importer):
|
||||
flash,
|
||||
datastore,
|
||||
):
|
||||
|
||||
good = 0
|
||||
now = time.time()
|
||||
self.new_uuids = []
|
||||
@@ -155,69 +153,62 @@ class import_xlsx_wachete(Importer):
|
||||
try:
|
||||
wb = load_workbook(data)
|
||||
except Exception as e:
|
||||
# @todo correct except
|
||||
#@todo correct except
|
||||
flash("Unable to read export XLSX file, something wrong with the file?", 'error')
|
||||
return
|
||||
|
||||
row_id = 2
|
||||
for row in wb.active.iter_rows(min_row=row_id):
|
||||
try:
|
||||
extras = {}
|
||||
data = {}
|
||||
for cell in row:
|
||||
if not cell.value:
|
||||
continue
|
||||
column_title = wb.active.cell(row=1, column=cell.column).value.strip().lower()
|
||||
data[column_title] = cell.value
|
||||
sheet_obj = wb.active
|
||||
|
||||
# Forced switch to webdriver/playwright/etc
|
||||
dynamic_wachet = str(data.get('dynamic wachet', '')).strip().lower() # Convert bool to str to cover all cases
|
||||
# libreoffice and others can have it as =FALSE() =TRUE(), or bool(true)
|
||||
if 'true' in dynamic_wachet or dynamic_wachet == '1':
|
||||
extras['fetch_backend'] = 'html_webdriver'
|
||||
elif 'false' in dynamic_wachet or dynamic_wachet == '0':
|
||||
extras['fetch_backend'] = 'html_requests'
|
||||
i = 1
|
||||
row = 2
|
||||
while sheet_obj.cell(row=row, column=1).value:
|
||||
data = {}
|
||||
while sheet_obj.cell(row=row, column=i).value:
|
||||
column_title = sheet_obj.cell(row=1, column=i).value.strip().lower()
|
||||
column_row_value = sheet_obj.cell(row=row, column=i).value
|
||||
data[column_title] = column_row_value
|
||||
|
||||
if data.get('xpath'):
|
||||
# @todo split by || ?
|
||||
extras['include_filters'] = [data.get('xpath')]
|
||||
if data.get('name'):
|
||||
extras['title'] = data.get('name').strip()
|
||||
if data.get('interval (min)'):
|
||||
minutes = int(data.get('interval (min)'))
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
days, hours = divmod(hours, 24)
|
||||
weeks, days = divmod(days, 7)
|
||||
extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0}
|
||||
i += 1
|
||||
|
||||
# At minimum a URL is required.
|
||||
if data.get('url'):
|
||||
try:
|
||||
validate_url(data.get('url'))
|
||||
except ValidationError as e:
|
||||
print(">> import URL error", data.get('url'), str(e))
|
||||
flash(f"Error processing row number {row_id}, URL value was incorrect, row was skipped.", 'error')
|
||||
# Don't bother processing anything else on this row
|
||||
continue
|
||||
extras = {}
|
||||
if data.get('xpath'):
|
||||
#@todo split by || ?
|
||||
extras['include_filters'] = [data.get('xpath')]
|
||||
if data.get('name'):
|
||||
extras['title'] = [data.get('name').strip()]
|
||||
if data.get('interval (min)'):
|
||||
minutes = int(data.get('interval (min)'))
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
days, hours = divmod(hours, 24)
|
||||
weeks, days = divmod(days, 7)
|
||||
extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0}
|
||||
|
||||
|
||||
# At minimum a URL is required.
|
||||
if data.get('url'):
|
||||
try:
|
||||
validate_url(data.get('url'))
|
||||
except ValidationError as e:
|
||||
print(">> import URL error", data.get('url'), str(e))
|
||||
# Don't bother processing anything else on this row
|
||||
continue
|
||||
|
||||
new_uuid = datastore.add_watch(url=data['url'].strip(),
|
||||
extras=extras,
|
||||
tag=data.get('folder'),
|
||||
write_to_disk_now=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
good += 1
|
||||
|
||||
row += 1
|
||||
i = 1
|
||||
|
||||
new_uuid = datastore.add_watch(url=data['url'].strip(),
|
||||
extras=extras,
|
||||
tag=data.get('folder'),
|
||||
write_to_disk_now=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
good += 1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
flash(f"Error processing row number {row_id}, check all cell data types are correct, row was skipped.", 'error')
|
||||
else:
|
||||
row_id += 1
|
||||
|
||||
flash(
|
||||
"{} imported from Wachete .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now))
|
||||
|
||||
|
||||
class import_xlsx_custom(Importer):
|
||||
|
||||
def run(self,
|
||||
@@ -225,7 +216,6 @@ class import_xlsx_custom(Importer):
|
||||
flash,
|
||||
datastore,
|
||||
):
|
||||
|
||||
good = 0
|
||||
now = time.time()
|
||||
self.new_uuids = []
|
||||
@@ -235,68 +225,56 @@ class import_xlsx_custom(Importer):
|
||||
try:
|
||||
wb = load_workbook(data)
|
||||
except Exception as e:
|
||||
# @todo correct except
|
||||
#@todo correct except
|
||||
flash("Unable to read export XLSX file, something wrong with the file?", 'error')
|
||||
return
|
||||
|
||||
# @todo cehck atleast 2 rows, same in other method
|
||||
|
||||
sheet_obj = wb.active
|
||||
from .forms import validate_url
|
||||
row_i = 1
|
||||
row = 2
|
||||
while sheet_obj.cell(row=row, column=1).value:
|
||||
url = None
|
||||
tags = None
|
||||
extras = {}
|
||||
for col_i, cell_map in self.import_profile.items():
|
||||
cell_val = sheet_obj.cell(row=row, column=col_i).value
|
||||
if cell_map == 'url':
|
||||
url = cell_val.strip()
|
||||
try:
|
||||
validate_url(url)
|
||||
except ValidationError as e:
|
||||
print (">> Import URL error",url, str(e))
|
||||
# Don't bother processing anything else on this row
|
||||
url = None
|
||||
break
|
||||
|
||||
try:
|
||||
for row in wb.active.iter_rows():
|
||||
url = None
|
||||
tags = None
|
||||
extras = {}
|
||||
elif cell_map == 'tag':
|
||||
tags = cell_val.strip()
|
||||
elif cell_map == 'include_filters':
|
||||
# @todo validate?
|
||||
extras['include_filters'] = [cell_val.strip()]
|
||||
elif cell_map == 'interval_minutes':
|
||||
hours, minutes = divmod(int(cell_val), 60)
|
||||
days, hours = divmod(hours, 24)
|
||||
weeks, days = divmod(days, 7)
|
||||
extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0}
|
||||
else:
|
||||
extras[cell_map] = cell_val.strip()
|
||||
|
||||
for cell in row:
|
||||
if not self.import_profile.get(cell.col_idx):
|
||||
continue
|
||||
if not cell.value:
|
||||
continue
|
||||
# At minimum a URL is required.
|
||||
if url:
|
||||
new_uuid = datastore.add_watch(url=url,
|
||||
extras=extras,
|
||||
tag=tags,
|
||||
write_to_disk_now=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
good += 1
|
||||
|
||||
cell_map = self.import_profile.get(cell.col_idx)
|
||||
|
||||
cell_val = str(cell.value).strip() # could be bool
|
||||
|
||||
if cell_map == 'url':
|
||||
url = cell.value.strip()
|
||||
try:
|
||||
validate_url(url)
|
||||
except ValidationError as e:
|
||||
print(">> Import URL error", url, str(e))
|
||||
flash(f"Error processing row number {row_i}, URL value was incorrect, row was skipped.", 'error')
|
||||
# Don't bother processing anything else on this row
|
||||
url = None
|
||||
break
|
||||
elif cell_map == 'tag':
|
||||
tags = cell.value.strip()
|
||||
elif cell_map == 'include_filters':
|
||||
# @todo validate?
|
||||
extras['include_filters'] = [cell.value.strip()]
|
||||
elif cell_map == 'interval_minutes':
|
||||
hours, minutes = divmod(int(cell_val), 60)
|
||||
days, hours = divmod(hours, 24)
|
||||
weeks, days = divmod(days, 7)
|
||||
extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0}
|
||||
else:
|
||||
extras[cell_map] = cell_val
|
||||
|
||||
# At minimum a URL is required.
|
||||
if url:
|
||||
new_uuid = datastore.add_watch(url=url,
|
||||
extras=extras,
|
||||
tag=tags,
|
||||
write_to_disk_now=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
good += 1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
flash(f"Error processing row number {row_i}, check all cell data types are correct, row was skipped.", 'error')
|
||||
else:
|
||||
row_i += 1
|
||||
row += 1
|
||||
|
||||
flash(
|
||||
"{} imported from custom .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now))
|
||||
|
||||
Binary file not shown.
@@ -127,7 +127,6 @@ def test_import_custom_xlsx(client, live_server):
|
||||
"""Test can upload a excel spreadsheet and the watches are created correctly"""
|
||||
|
||||
#live_server_setup(live_server)
|
||||
|
||||
dirname = os.path.dirname(__file__)
|
||||
filename = os.path.join(dirname, 'import/spreadsheet.xlsx')
|
||||
with open(filename, 'rb') as f:
|
||||
@@ -151,14 +150,13 @@ def test_import_custom_xlsx(client, live_server):
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
assert b'4 imported from custom .xlsx' in res.data
|
||||
# Because this row was actually just a header with no usable URL, we should get an error
|
||||
assert b'Error processing row number 1' in res.data
|
||||
assert b'2 imported from custom .xlsx' in res.data
|
||||
|
||||
res = client.get(
|
||||
url_for("index")
|
||||
)
|
||||
|
||||
|
||||
assert b'Somesite results ABC' in res.data
|
||||
assert b'City news results' in res.data
|
||||
|
||||
@@ -169,9 +167,6 @@ def test_import_custom_xlsx(client, live_server):
|
||||
assert filters[0] == '/html[1]/body[1]/div[4]/div[1]/div[1]/div[1]||//*[@id=\'content\']/div[3]/div[1]/div[1]||//*[@id=\'content\']/div[1]'
|
||||
assert watch.get('time_between_check') == {'weeks': 0, 'days': 1, 'hours': 6, 'minutes': 24, 'seconds': 0}
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_import_watchete_xlsx(client, live_server):
|
||||
"""Test can upload a excel spreadsheet and the watches are created correctly"""
|
||||
|
||||
@@ -191,7 +186,7 @@ def test_import_watchete_xlsx(client, live_server):
|
||||
follow_redirects=True,
|
||||
)
|
||||
|
||||
assert b'4 imported from Wachete .xlsx' in res.data
|
||||
assert b'2 imported from Wachete .xlsx' in res.data
|
||||
|
||||
res = client.get(
|
||||
url_for("index")
|
||||
@@ -206,13 +201,3 @@ def test_import_watchete_xlsx(client, live_server):
|
||||
filters = watch.get('include_filters')
|
||||
assert filters[0] == '/html[1]/body[1]/div[4]/div[1]/div[1]/div[1]||//*[@id=\'content\']/div[3]/div[1]/div[1]||//*[@id=\'content\']/div[1]'
|
||||
assert watch.get('time_between_check') == {'weeks': 0, 'days': 1, 'hours': 6, 'minutes': 24, 'seconds': 0}
|
||||
assert watch.get('fetch_backend') == 'html_requests' # Has inactive 'dynamic wachet'
|
||||
|
||||
if watch.get('title') == 'JS website':
|
||||
assert watch.get('fetch_backend') == 'html_webdriver' # Has active 'dynamic wachet'
|
||||
|
||||
if watch.get('title') == 'system default website':
|
||||
assert watch.get('fetch_backend') == 'system' # uses default if blank
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
@@ -54,13 +54,6 @@ def test_visual_selector_content_ready(client, live_server):
|
||||
with open(os.path.join('test-datastore', uuid, 'elements.json'), 'r') as f:
|
||||
json.load(f)
|
||||
|
||||
# Attempt to fetch it via the web hook that the browser would use
|
||||
res = client.get(url_for('static_content', group='visual_selector_data', filename=uuid))
|
||||
json.loads(res.data)
|
||||
assert res.mimetype == 'application/json'
|
||||
assert res.status_code == 200
|
||||
|
||||
|
||||
# Some options should be enabled
|
||||
# @todo - in the future, the visibility should be toggled by JS from the request type setting
|
||||
res = client.get(
|
||||
|
||||
@@ -66,12 +66,25 @@ services:
|
||||
# browser-chrome:
|
||||
# condition: service_started
|
||||
|
||||
# browser-chrome:
|
||||
# hostname: browser-chrome
|
||||
# image: selenium/standalone-chrome:4
|
||||
# environment:
|
||||
# - VNC_NO_PASSWORD=1
|
||||
# - SCREEN_WIDTH=1920
|
||||
# - SCREEN_HEIGHT=1080
|
||||
# - SCREEN_DEPTH=24
|
||||
# volumes:
|
||||
# # Workaround to avoid the browser crashing inside a docker container
|
||||
# # See https://github.com/SeleniumHQ/docker-selenium#quick-start
|
||||
# - /dev/shm:/dev/shm
|
||||
# restart: unless-stopped
|
||||
|
||||
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
|
||||
# Note: Playwright/browserless not supported on ARM type devices (rPi etc)
|
||||
# RECOMMENDED FOR FETCHING PAGES WITH CHROME
|
||||
# playwright-chrome:
|
||||
# hostname: playwright-chrome
|
||||
# image: browserless/chrome:1.60-chrome-stable
|
||||
# image: browserless/chrome
|
||||
# restart: unless-stopped
|
||||
# environment:
|
||||
# - SCREEN_WIDTH=1920
|
||||
@@ -88,23 +101,6 @@ services:
|
||||
# Ignore HTTPS errors, like for self-signed certs
|
||||
# - DEFAULT_IGNORE_HTTPS_ERRORS=true
|
||||
#
|
||||
|
||||
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
|
||||
# Note: works well but is deprecated, doesnt fetch full page screenshots and other issues
|
||||
# browser-chrome:
|
||||
# hostname: browser-chrome
|
||||
# image: selenium/standalone-chrome:4
|
||||
# environment:
|
||||
# - VNC_NO_PASSWORD=1
|
||||
# - SCREEN_WIDTH=1920
|
||||
# - SCREEN_HEIGHT=1080
|
||||
# - SCREEN_DEPTH=24
|
||||
# volumes:
|
||||
# # Workaround to avoid the browser crashing inside a docker container
|
||||
# # See https://github.com/SeleniumHQ/docker-selenium#quick-start
|
||||
# - /dev/shm:/dev/shm
|
||||
# restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
changedetection-data:
|
||||
|
||||
|
||||
Reference in New Issue
Block a user