Compare commits

...

20 Commits

Author SHA1 Message Date
dgtlmoon
699e4a01f0 fix syntax 2023-01-08 14:23:21 +01:00
dgtlmoon
1b2507890d Also test that the proxy list JSON, if it exists - on startup - doesnt throw a parse error 2023-01-08 14:17:05 +01:00
dgtlmoon
6619e62972 Dont recreate DB if its corrupt, exit with error cleanly 2023-01-08 13:56:25 +01:00
jtagcat
58c7cbeac7 UI: Updating queued success message (#1285) 2023-01-05 21:12:02 +01:00
Abhishek Malani
ab9efdfd14 README.md - Fix release link (#1277) 2022-12-29 11:06:51 +01:00
Hmmbob
65d5a5d34c Notifications: updating apprise (slack notification fixes and others) (#1272) 2022-12-28 18:34:55 +01:00
dgtlmoon
93c157ee7f Remove docker-compose version so it works on any modern version #1144 (#1268) 2022-12-26 20:37:31 +01:00
Bill Metangmo
de85db887c Update the docker compose file to any version (#1079) (#1144) 2022-12-26 20:36:42 +01:00
dgtlmoon
50805ca38a IPv6 support for listening on (#1267) 2022-12-26 20:36:16 +01:00
dgtlmoon
fc6424c39e Test improvements (#1264) 2022-12-26 14:17:40 +01:00
dgtlmoon
f0966eb23a 0.40.0.4 2022-12-25 18:25:45 +01:00
dgtlmoon
e4fb5ab4da UI - Suggest adding proxy for watch when 403 access denied is reached (#1260) 2022-12-23 22:26:24 +01:00
dgtlmoon
e99f07a51d Filters & Notifications - fixed tokens in filter not found notification 2022-12-22 10:05:17 +01:00
dgtlmoon
08ee223b5f UI - Fix broken html tags in settings page 2022-12-20 18:57:26 +01:00
dgtlmoon
572f9b8a31 Proxy Settings in UI - TidyUp BrightData text 2022-12-20 10:08:16 +01:00
dgtlmoon
fcfd1b5e10 Ability to configure extra proxies via the UI (#1235) 2022-12-19 21:48:01 +01:00
dgtlmoon
0790dd555e Docker container updates - use Python 3.10, remove unused packages 2022-12-19 20:46:02 +01:00
dgtlmoon
0b20dc7712 Tidy up list icons a bit (#1250) 2022-12-19 20:30:32 +01:00
dgtlmoon
13c4121f52 PDF File change detection - Initial PDF fetcher support with basic text extraction (#1244) 2022-12-19 17:51:41 +01:00
dgtlmoon
e8e176f3bd Testing - Run test as fully built docker container (#1245) 2022-12-19 14:41:34 +01:00
41 changed files with 701 additions and 363 deletions

View File

@@ -50,7 +50,6 @@ jobs:
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
- name: Create release metadata
run: |

View File

@@ -19,12 +19,6 @@ jobs:
with:
python-version: 3.9
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install flake8 pytest
# if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
# if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
- name: Test that pip builds without error
run: |

View File

@@ -8,32 +8,70 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
# Mainly just for link/flake8
- name: Set up Python 3.10
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
python-version: '3.10'
- name: Lint with flake8
run: |
pip3 install flake8
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Unit tests
- name: Spin up ancillary testable services
run: |
python3 -m unittest changedetectionio.tests.unit.test_notification_diff
docker network create changedet-network
- name: Test with pytest
# Selenium+browserless
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome-debug:3.141.59
docker run --network changedet-network -d --hostname browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
- name: Build changedetection.io container for testing
run: |
# Build a changedetection.io container and start testing inside
docker build . -t test-changedetectionio
- name: Test built container with pytest
run: |
# Each test is totally isolated and performs its own cleanup/reset
cd changedetectionio; ./run_all_tests.sh
# Unit tests
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
# All tests
docker run --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
- name: Test built container selenium+browserless/playwright
run: |
# Selenium fetch
docker run -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
# Playwright/Browserless fetch
docker run -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
- name: Test proxy interaction
run: |
cd changedetectionio
./run_proxy_tests.sh
cd ..
- name: Test changedetection.io container starts+runs basically without error
run: |
docker run -p 5556:5000 -d test-changedetectionio
sleep 3
# Should return 0 (no error) when grep finds it
curl -s http://localhost:5556 |grep -q checkbox-uuid
curl -s http://localhost:5556/rss|grep -q rss-specification
# and IPv6
curl -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
curl -s -g -6 "http://[::1]:5556/rss"|grep -q rss-specification
#export WEBDRIVER_URL=http://localhost:4444/wd/hub
#pytest tests/fetchers/test_content.py
#pytest tests/test_errorhandling.py

View File

@@ -7,9 +7,3 @@ Otherwise, it's always best to PR into the `dev` branch.
Please be sure that all new functionality has a matching test!
Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example
```
pip3 install -r requirements-dev
```
this is from https://github.com/dgtlmoon/changedetection.io/blob/master/requirements-dev.txt

View File

@@ -1,5 +1,5 @@
# pip dependencies install stage
FROM python:3.8-slim as builder
FROM python:3.10-slim as builder
# See `cryptography` pin comment in requirements.txt
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
@@ -29,21 +29,16 @@ RUN pip install --target=/dependencies playwright~=1.27.1 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
# Final image stage
FROM python:3.8-slim
FROM python:3.10-slim
# See `cryptography` pin comment in requirements.txt
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
# Re #93, #73, excluding rustc (adds another 430Mb~)
RUN apt-get update && apt-get install -y --no-install-recommends \
g++ \
gcc \
libc-dev \
libffi-dev \
libjpeg-dev \
libssl-dev \
libxslt-dev \
zlib1g-dev
libssl1.1 \
libxslt1.1 \
# For pdftohtml
poppler-utils \
zlib1g \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops
ENV PYTHONUNBUFFERED=1

View File

@@ -43,6 +43,7 @@ Requires Playwright to be enabled.
- Products and services have a change in pricing
- _Out of stock notification_ and _Back In stock notification_
- Monitor and track PDF file changes, know when a PDF file has text changes.
- Governmental department updates (changes are often only on their websites)
- New software releases, security advisories when you're not on their mailing list.
- Festivals with changes
@@ -68,6 +69,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
- Switch between fast non-JS and Chrome JS based "fetchers"
- Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums)
- Easily specify how often a site should be checked
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
- Override Request Headers, Specify `POST` or `GET` and other methods
@@ -243,5 +245,5 @@ I offer commercial support, this software is depended on by network security, ae
[test-shield]: https://github.com/dgtlmoon/changedetection.io/actions/workflows/test-only.yml/badge.svg?branch=master
[license-shield]: https://img.shields.io/github/license/dgtlmoon/changedetection.io.svg?style=for-the-badge
[release-link]: https://github.com/dgtlmoon.com/changedetection.io/releases
[release-link]: https://github.com/dgtlmoon/changedetection.io/releases
[docker-link]: https://hub.docker.com/r/dgtlmoon/changedetection.io

View File

@@ -7,7 +7,7 @@
from changedetectionio import changedetection
import multiprocessing
import signal
import sys
import os
def sigchld_handler(_signo, _stack_frame):
@@ -35,6 +35,9 @@ if __name__ == '__main__':
try:
while True:
time.sleep(1)
if not parse_process.is_alive():
# Process died/crashed for some reason, exit with error set
sys.exit(1)
except KeyboardInterrupt:
#parse_process.terminate() not needed, because this process will issue it to the sub-process anyway

View File

@@ -36,7 +36,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools
from changedetectionio.api import api_v1
__version__ = '0.40.0.3'
__version__ = '0.40.0.4'
datastore = None
@@ -406,17 +406,20 @@ def changedetection_app(config=None, datastore_o=None):
existing_tags = datastore.get_all_tags()
form = forms.quickWatchForm(request.form)
output = render_template("watch-overview.html",
form=form,
watches=sorted_watches,
tags=existing_tags,
output = render_template(
"watch-overview.html",
# Don't link to hosting when we're on the hosting environment
active_tag=limit_tag,
app_rss_token=datastore.data['settings']['application']['rss_access_token'],
has_unviewed=datastore.has_unviewed,
# Don't link to hosting when we're on the hosting environment
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
form=form,
guid=datastore.data['app_guid'],
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue])
has_proxies=datastore.proxy_list,
has_unviewed=datastore.has_unviewed,
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
tags=existing_tags,
watches=sorted_watches
)
if session.get('share-link'):
@@ -1215,7 +1218,7 @@ def changedetection_app(config=None, datastore_o=None):
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
i += 1
flash("{} watches are queued for rechecking.".format(i))
flash("{} watches queued for rechecking.".format(i))
return redirect(url_for('index', tag=tag))
@app.route("/form/checkbox-operations", methods=['POST'])
@@ -1236,7 +1239,6 @@ def changedetection_app(config=None, datastore_o=None):
uuid = uuid.strip()
if datastore.data['watching'].get(uuid):
datastore.data['watching'][uuid.strip()]['paused'] = True
flash("{} watches paused".format(len(uuids)))
elif (op == 'unpause'):
@@ -1266,8 +1268,8 @@ def changedetection_app(config=None, datastore_o=None):
if datastore.data['watching'].get(uuid):
# Recheck and require a full reprocessing
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
flash("{} watches queued for rechecking".format(len(uuids)))
flash("{} watches un-muted".format(len(uuids)))
elif (op == 'notification-default'):
from changedetectionio.notification import (
default_notification_format_for_watch

View File

@@ -3,11 +3,14 @@
# Launch as a eventlet.wsgi server instance.
from distutils.util import strtobool
from json.decoder import JSONDecodeError
import eventlet
import eventlet.wsgi
import getopt
import os
import signal
import socket
import sys
from . import store, changedetection_app, content_fetcher
@@ -83,8 +86,14 @@ def main():
"Or use the -C parameter to create the directory.".format(app_config['datastore_path']), file=sys.stderr)
sys.exit(2)
try:
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__)
except JSONDecodeError as e:
# Dont' start if the JSON DB looks corrupt
print ("ERROR: JSON DB or Proxy List JSON at '{}' appears to be corrupt, aborting".format(app_config['datastore_path']))
print(str(e))
return
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__)
app = changedetection_app(app_config, datastore)
signal.signal(signal.SIGTERM, sigterm_handler)
@@ -126,11 +135,11 @@ def main():
if ssl_mode:
# @todo finalise SSL config, but this should get you in the right direction if you need it.
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port)),
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), socket.AF_INET6),
certfile='cert.pem',
keyfile='privkey.pem',
server_side=True), app)
else:
eventlet.wsgi.server(eventlet.listen((host, int(port))), app)
eventlet.wsgi.server(eventlet.listen((host, int(port)), socket.AF_INET6), app)

View File

@@ -1,3 +1,4 @@
import hashlib
from abc import abstractmethod
import chardet
import json
@@ -116,7 +117,8 @@ class Fetcher():
request_body,
request_method,
ignore_status_codes=False,
current_include_filters=None):
current_include_filters=None,
is_binary=False):
# Should set self.error, self.status_code and self.content
pass
@@ -241,6 +243,14 @@ class base_html_playwright(Fetcher):
if proxy_override:
self.proxy = {'server': proxy_override}
if self.proxy:
# Playwright needs separate username and password values
from urllib.parse import urlparse
parsed = urlparse(self.proxy.get('server'))
if parsed.username:
self.proxy['username'] = parsed.username
self.proxy['password'] = parsed.password
def screenshot_step(self, step_n=''):
# There's a bug where we need to do it twice or it doesnt take the whole page, dont know why.
@@ -267,7 +277,8 @@ class base_html_playwright(Fetcher):
request_body,
request_method,
ignore_status_codes=False,
current_include_filters=None):
current_include_filters=None,
is_binary=False):
from playwright.sync_api import sync_playwright
import playwright._impl._api_types
@@ -367,7 +378,7 @@ class base_html_playwright(Fetcher):
context.close()
browser.close()
print ("Content Fetcher > Content was empty")
raise EmptyReply(url=url, status_code=None)
raise EmptyReply(url=url, status_code=response.status)
# Bug 2(?) Set the viewport size AFTER loading the page
self.page.set_viewport_size({"width": 1280, "height": 1024})
@@ -453,7 +464,8 @@ class base_html_webdriver(Fetcher):
request_body,
request_method,
ignore_status_codes=False,
current_include_filters=None):
current_include_filters=None,
is_binary=False):
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@@ -528,7 +540,8 @@ class html_requests(Fetcher):
request_body,
request_method,
ignore_status_codes=False,
current_include_filters=None):
current_include_filters=None,
is_binary=False):
# Make requests use a more modern looking user-agent
if not 'User-Agent' in request_headers:
@@ -558,10 +571,12 @@ class html_requests(Fetcher):
# For example - some sites don't tell us it's utf-8, but return utf-8 content
# This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
# https://github.com/psf/requests/issues/1604 good info about requests encoding detection
if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
encoding = chardet.detect(r.content)['encoding']
if encoding:
r.encoding = encoding
if not is_binary:
# Don't run this for PDF (and requests identified as binary) takes a _long_ time
if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
encoding = chardet.detect(r.content)['encoding']
if encoding:
r.encoding = encoding
if not r.content or not len(r.content):
raise EmptyReply(url=url, status_code=r.status_code)
@@ -573,8 +588,14 @@ class html_requests(Fetcher):
raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text)
self.status_code = r.status_code
self.content = r.text
if is_binary:
# Binary files just return their checksum until we add something smarter
self.content = hashlib.md5(r.content).hexdigest()
else:
self.content = r.text
self.headers = r.headers
self.raw_content = r.content
# Decide which is the 'real' HTML webdriver, this is more a system wide config

View File

@@ -1,14 +0,0 @@
FROM python:3.8-slim
# https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops
ENV PYTHONUNBUFFERED=1
WORKDIR /app
RUN [ ! -d "/datastore" ] && mkdir /datastore
COPY sleep.py /
CMD [ "python", "/sleep.py" ]

View File

@@ -1,7 +0,0 @@
import time
print ("Sleep loop, you should run your script from the console")
while True:
# Wait for 5 seconds
time.sleep(2)

View File

@@ -16,6 +16,10 @@ class FilterNotFoundInResponse(ValueError):
def __init__(self, msg):
ValueError.__init__(self, msg)
class PDFToHTMLToolNotFound(ValueError):
def __init__(self, msg):
ValueError.__init__(self, msg)
# Some common stuff here that can be moved to a base class
# (set_proxy_from_list)
@@ -87,7 +91,7 @@ class perform_site_check():
is_source = True
# Pluggable content fetcher
prefer_backend = watch.get('fetch_backend')
prefer_backend = watch.get_fetch_backend
if hasattr(content_fetcher, prefer_backend):
klass = getattr(content_fetcher, prefer_backend)
else:
@@ -117,12 +121,18 @@ class perform_site_check():
if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip():
fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code')
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'))
# requests for PDF's, images etc should be passwd the is_binary flag
is_binary = watch.is_pdf
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'), is_binary=is_binary)
fetcher.quit()
self.screenshot = fetcher.screenshot
self.xpath_data = fetcher.xpath_data
# Track the content type
update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
# Saves a lot of CPU
update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
@@ -149,6 +159,31 @@ class perform_site_check():
is_html = False
is_json = False
if watch.is_pdf or 'application/pdf' in fetcher.headers.get('Content-Type', '').lower():
from shutil import which
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
if not which(tool):
raise PDFToHTMLToolNotFound("Command-line `{}` tool was not found in system PATH, was it installed?".format(tool))
import subprocess
proc = subprocess.Popen(
[tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
proc.stdin.write(fetcher.raw_content)
proc.stdin.close()
fetcher.content = proc.stdout.read().decode('utf-8')
proc.wait(timeout=60)
# Add a little metadata so we know if the file changes (like if an image changes, but the text is the same
# @todo may cause problems with non-UTF8?
metadata = "<p>Added by changedetection.io: Document checksum - {} Filesize - {} bytes</p>".format(
hashlib.md5(fetcher.raw_content).hexdigest().upper(),
len(fetcher.content))
fetcher.content = fetcher.content.replace('</body>', metadata + '</body>')
include_filters_rule = deepcopy(watch.get('include_filters', []))
# include_filters_rule = watch['include_filters']
subtractive_selectors = watch.get(

View File

@@ -426,6 +426,13 @@ class watchForm(commonSettingsForm):
return result
class SingleExtraProxy(Form):
# maybe better to set some <script>var..
proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
proxy_url = StringField('Proxy URL', [validators.Optional()], render_kw={"placeholder": "http://user:pass@...:3128", "size":50})
# @todo do the validation here instead
# datastore.data['settings']['requests']..
class globalSettingsRequestForm(Form):
time_between_check = FormField(TimeBetweenCheckForm)
@@ -433,6 +440,15 @@ class globalSettingsRequestForm(Form):
jitter_seconds = IntegerField('Random jitter seconds ± check',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
def validate_extra_proxies(self, extra_validators=None):
for e in self.data['extra_proxies']:
if e.get('proxy_name') or e.get('proxy_url'):
if not e.get('proxy_name','').strip() or not e.get('proxy_url','').strip():
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
return False
# datastore.data['settings']['application']..
class globalSettingsApplicationForm(commonSettingsForm):

View File

@@ -15,11 +15,12 @@ class model(dict):
'headers': {
},
'requests': {
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
'extra_proxies': [], # Configurable extra proxies via the UI
'jitter_seconds': 0,
'proxy': None, # Preferred proxy connection
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
'proxy': None # Preferred proxy connection
},
'application': {
'api_access_token_enabled': True,

View File

@@ -114,6 +114,24 @@ class model(dict):
return ready_url
@property
def get_fetch_backend(self):
"""
Like just using the `fetch_backend` key but there could be some logic
:return:
"""
# Maybe also if is_image etc?
# This is because chrome/playwright wont render the PDF in the browser and we will just fetch it and use pdf2html to see the text.
if self.is_pdf:
return 'html_requests'
return self.get('fetch_backend')
@property
def is_pdf(self):
# content_type field is set in the future
return '.pdf' in self.get('url', '').lower() or 'pdf' in self.get('content_type', '').lower()
@property
def label(self):
# Used for sorting

View File

@@ -1,104 +0,0 @@
#!/bin/bash
# live_server will throw errors even with live_server_scope=function if I have the live_server setup in different functions
# and I like to restart the server for each test (and have the test cleanup after each test)
# merge request welcome :)
# exit when any command fails
set -e
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
find tests/test_*py -type f|while read test_name
do
echo "TEST RUNNING $test_name"
pytest $test_name
done
echo "RUNNING WITH BASE_URL SET"
# Now re-run some tests with BASE_URL enabled
# Re #65 - Ability to include a link back to the installation, in the notification.
export BASE_URL="https://really-unique-domain.io"
pytest tests/test_notification.py
# Re-run with HIDE_REFERER set - could affect login
export HIDE_REFERER=True
pytest tests/test_access_control.py
# Now for the selenium and playwright/browserless fetchers
# Note - this is not UI functional tests - just checking that each one can fetch the content
echo "TESTING WEBDRIVER FETCH > SELENIUM/WEBDRIVER..."
docker run -d --name $$-test_selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome-debug:3.141.59
# takes a while to spin up
sleep 5
export WEBDRIVER_URL=http://localhost:4444/wd/hub
pytest tests/fetchers/test_content.py
pytest tests/test_errorhandling.py
unset WEBDRIVER_URL
docker kill $$-test_selenium
echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+")
echo "using $PLAYWRIGHT_VERSION"
pip3 install "$PLAYWRIGHT_VERSION"
docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
# takes a while to spin up
sleep 5
export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000
pytest tests/fetchers/test_content.py
pytest tests/test_errorhandling.py
pytest tests/visualselector/test_fetch_data.py
unset PLAYWRIGHT_DRIVER_URL
docker kill $$-test_browserless
# Test proxy list handling, starting two squids on different ports
# Each squid adds a different header to the response, which is the main thing we test for.
docker run -d --name $$-squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3128:3128 ubuntu/squid:4.13-21.10_edge
docker run -d --name $$-squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3129:3128 ubuntu/squid:4.13-21.10_edge
# So, basic HTTP as env var test
export HTTP_PROXY=http://localhost:3128
export HTTPS_PROXY=http://localhost:3128
pytest tests/proxy_list/test_proxy.py
docker logs $$-squid-one 2>/dev/null|grep one.changedetection.io
if [ $? -ne 0 ]
then
echo "Did not see a request to one.changedetection.io in the squid logs (while checking env vars HTTP_PROXY/HTTPS_PROXY)"
fi
unset HTTP_PROXY
unset HTTPS_PROXY
# 2nd test actually choose the preferred proxy from proxies.json
cp tests/proxy_list/proxies.json-example ./test-datastore/proxies.json
# Makes a watch use a preferred proxy
pytest tests/proxy_list/test_multiple_proxy.py
# Should be a request in the default "first" squid
docker logs $$-squid-one 2>/dev/null|grep chosen.changedetection.io
if [ $? -ne 0 ]
then
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
fi
# And one in the 'second' squid (user selects this as preferred)
docker logs $$-squid-two 2>/dev/null|grep chosen.changedetection.io
if [ $? -ne 0 ]
then
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
fi
# @todo - test system override proxy selection and watch defaults, setup a 3rd squid?
docker kill $$-squid-one
docker kill $$-squid-two

View File

@@ -0,0 +1,30 @@
#!/bin/bash
# live_server will throw errors even with live_server_scope=function if I have the live_server setup in different functions
# and I like to restart the server for each test (and have the test cleanup after each test)
# merge request welcome :)
# exit when any command fails
set -e
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
find tests/test_*py -type f|while read test_name
do
echo "TEST RUNNING $test_name"
pytest $test_name
done
echo "RUNNING WITH BASE_URL SET"
# Now re-run some tests with BASE_URL enabled
# Re #65 - Ability to include a link back to the installation, in the notification.
export BASE_URL="https://really-unique-domain.io"
pytest tests/test_notification.py
# Re-run with HIDE_REFERER set - could affect login
export HIDE_REFERER=True
pytest tests/test_access_control.py

View File

@@ -0,0 +1,61 @@
#!/bin/bash
# exit when any command fails
set -e
# Test proxy list handling, starting two squids on different ports
# Each squid adds a different header to the response, which is the main thing we test for.
docker run --network changedet-network -d --name squid-one --hostname squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf ubuntu/squid:4.13-21.10_edge
docker run --network changedet-network -d --name squid-two --hostname squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf ubuntu/squid:4.13-21.10_edge
# Used for configuring a custom proxy URL via the UI
docker run --network changedet-network -d \
--name squid-custom \
--hostname squid-custom \
--rm \
-v `pwd`/tests/proxy_list/squid-auth.conf:/etc/squid/conf.d/debian.conf \
-v `pwd`/tests/proxy_list/squid-passwords.txt:/etc/squid3/passwords \
ubuntu/squid:4.13-21.10_edge
## 2nd test actually choose the preferred proxy from proxies.json
docker run --network changedet-network \
-v `pwd`/tests/proxy_list/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_multiple_proxy.py'
## Should be a request in the default "first" squid
docker logs squid-one 2>/dev/null|grep chosen.changedetection.io
if [ $? -ne 0 ]
then
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy - squid one)"
exit 1
fi
# And one in the 'second' squid (user selects this as preferred)
docker logs squid-two 2>/dev/null|grep chosen.changedetection.io
if [ $? -ne 0 ]
then
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy - squid two)"
exit 1
fi
# Test the UI configurable proxies
docker run --network changedet-network \
test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py'
# Should see a request for one.changedetection.io in there
docker logs squid-custom 2>/dev/null|grep "TCP_TUNNEL.200.*changedetection.io"
if [ $? -ne 0 ]
then
echo "Did not see a valid request to changedetection.io in the squid logs (while checking preferred proxy - squid two)"
exit 1
fi
docker kill squid-one squid-two squid-custom

View File

@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg xmlns="http://www.w3.org/2000/svg" width="75.320129mm" height="92.604164mm" viewBox="0 0 75.320129 92.604164">
<g transform="translate(53.548057 -183.975276) scale(1.4843)">
<path fill="#ff2116" d="M-29.632812 123.94727c-3.551967 0-6.44336 2.89347-6.44336 6.44531v49.49804c0 3.55185 2.891393 6.44532 6.44336 6.44532H8.2167969c3.5519661 0 6.4433591-2.89335 6.4433591-6.44532v-40.70117s.101353-1.19181-.416015-2.35156c-.484969-1.08711-1.275391-1.84375-1.275391-1.84375a1.0584391 1.0584391 0 0 0-.0059-.008l-9.3906254-9.21094a1.0584391 1.0584391 0 0 0-.015625-.0156s-.8017392-.76344-1.9902344-1.27344c-1.39939552-.6005-2.8417968-.53711-2.8417968-.53711l.021484-.002z" color="#000" font-family="sans-serif" overflow="visible" paint-order="markers fill stroke" style="line-height:normal;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;text-transform:none;text-orientation:mixed;white-space:normal;shape-padding:0;isolation:auto;mix-blend-mode:normal;solid-color:#000000;solid-opacity:1"/>
<path fill="#f5f5f5" d="M-29.632812 126.06445h28.3789058a1.0584391 1.0584391 0 0 0 .021484 0s1.13480448.011 1.96484378.36719c.79889772.34282 1.36536982.86176 1.36914062.86524.0000125.00001.00391.004.00391.004l9.3671868 9.18945s.564354.59582.837891 1.20899c.220779.49491.234375 1.40039.234375 1.40039a1.0584391 1.0584391 0 0 0-.002.0449v40.74609c0 2.41592-1.910258 4.32813-4.3261717 4.32813H-29.632812c-2.415914 0-4.326172-1.91209-4.326172-4.32813v-49.49804c0-2.41603 1.910258-4.32813 4.326172-4.32813z" color="#000" font-family="sans-serif" overflow="visible" paint-order="markers fill stroke" style="line-height:normal;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;text-transform:none;text-orientation:mixed;white-space:normal;shape-padding:0;isolation:auto;mix-blend-mode:normal;solid-color:#000000;solid-opacity:1"/>
<path fill="#ff2116" d="M-23.40766 161.09299c-1.45669-1.45669.11934-3.45839 4.39648-5.58397l2.69124-1.33743 1.04845-2.29399c.57665-1.26169 1.43729-3.32036 1.91254-4.5748l.8641-2.28082-.59546-1.68793c-.73217-2.07547-.99326-5.19438-.52872-6.31588.62923-1.51909 2.69029-1.36323 3.50626.26515.63727 1.27176.57212 3.57488-.18329 6.47946l-.6193 2.38125.5455.92604c.30003.50932 1.1764 1.71867 1.9475 2.68743l1.44924 1.80272 1.8033728-.23533c5.72900399-.74758 7.6912472.523 7.6912472 2.34476 0 2.29921-4.4984914 2.48899-8.2760865-.16423-.8499666-.59698-1.4336605-1.19001-1.4336605-1.19001s-2.3665326.48178-3.531704.79583c-1.202707.32417-1.80274.52719-3.564509 1.12186 0 0-.61814.89767-1.02094 1.55026-1.49858 2.4279-3.24833 4.43998-4.49793 5.1723-1.3991.81993-2.86584.87582-3.60433.13733zm2.28605-.81668c.81883-.50607 2.47616-2.46625 3.62341-4.28553l.46449-.73658-2.11497 1.06339c-3.26655 1.64239-4.76093 3.19033-3.98386 4.12664.43653.52598.95874.48237 2.01093-.16792zm21.21809-5.95578c.80089-.56097.68463-1.69142-.22082-2.1472-.70466-.35471-1.2726074-.42759-3.1031574-.40057-1.1249.0767-2.9337647.3034-3.2403347.37237 0 0 .993716.68678 1.434896.93922.58731.33544 2.0145161.95811 3.0565161 1.27706 1.02785.31461 1.6224.28144 2.0729-.0409zm-8.53152-3.54594c-.4847-.50952-1.30889-1.57296-1.83152-2.3632-.68353-.89643-1.02629-1.52887-1.02629-1.52887s-.4996 1.60694-.90948 2.57394l-1.27876 3.16076-.37075.71695s1.971043-.64627 2.97389-.90822c1.0621668-.27744 3.21787-.70134 3.21787-.70134zm-2.74938-11.02573c.12363-1.0375.1761-2.07346-.15724-2.59587-.9246-1.01077-2.04057-.16787-1.85154 2.23517.0636.8084.26443 2.19033.53292 3.04209l.48817 1.54863.34358-1.16638c.18897-.64151.47882-2.02015.64411-3.06364z"/>
<path fill="#2c2c2c" d="M-20.930423 167.83862h2.364986q1.133514 0 1.840213.2169.706698.20991 1.189489.9446.482795.72769.482795 1.75625 0 .94459-.391832 1.6233-.391833.67871-1.056548.97958-.65772.30087-2.02913.30087h-.818651v3.72941h-1.581322zm1.581322 1.22447v3.33058h.783664q1.049552 0 1.44838-.39184.405826-.39183.405826-1.27345 0-.65772-.265887-1.06355-.265884-.41282-.587747-.50378-.314866-.098-1.000572-.098zm5.50664-1.22447h2.148082q1.560333 0 2.4909318.55276.9375993.55276 1.4133973 1.6443.482791 1.09153.482791 2.42096 0 1.3994-.4338151 2.49793-.4268149 1.09153-1.3154348 1.76324-.8816233.67172-2.5189212.67172h-2.267031zm1.581326 1.26645v7.018h.657715q1.378411 0 2.001144-.9516.6227329-.95858.6227329-2.5539 0-3.5125-2.6238769-3.5125zm6.4722254-1.26645h5.30372941v1.26645H-4.2075842v2.85478h2.9807225v1.26646h-2.9807225v4.16322h-1.5813254z" font-family="Franklin Gothic Medium Cond" letter-spacing="0" style="line-height:125%;-inkscape-font-specification:'Franklin Gothic Medium Cond'" word-spacing="4.26000023"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 5.0 KiB

View File

@@ -0,0 +1,3 @@
node_modules
package-lock.json

View File

@@ -0,0 +1,17 @@
ul#requests-extra_proxies {
list-style: none;
/* tidy up the table to look more "inline" */
li {
> label {
display: none;
}
}
/* each proxy entry is a `table` */
table {
tr {
display: inline;
}
}
}

View File

@@ -2,10 +2,11 @@
* -- BASE STYLES --
*/
@import "parts/_variables";
@import "parts/_spinners";
@import "parts/_browser-steps";
@import "parts/_arrows";
@import "parts/_browser-steps";
@import "parts/_extra_proxies";
@import "parts/_spinners";
@import "parts/_variables";
body {
color: var(--color-text);
@@ -22,6 +23,13 @@ body {
width: 1px;
}
// Row icons like chrome, pdf, share, etc
.status-icon {
display: inline-block;
height: 1rem;
vertical-align: middle;
}
.pure-table-even {
background: var(--color-background);
}

View File

@@ -1,6 +1,139 @@
/*
* -- BASE STYLES --
*/
.arrow {
border: solid #1b98f8;
border-width: 0 2px 2px 0;
display: inline-block;
padding: 3px; }
.arrow.right {
transform: rotate(-45deg);
-webkit-transform: rotate(-45deg); }
.arrow.left {
transform: rotate(135deg);
-webkit-transform: rotate(135deg); }
.arrow.up, .arrow.asc {
transform: rotate(-135deg);
-webkit-transform: rotate(-135deg); }
.arrow.down, .arrow.desc {
transform: rotate(45deg);
-webkit-transform: rotate(45deg); }
#browser_steps {
/* convert rows to horizontal cells */ }
#browser_steps th {
display: none; }
#browser_steps li {
list-style: decimal;
padding: 5px; }
#browser_steps li:not(:first-child):hover {
opacity: 1.0; }
#browser_steps li .control {
padding-left: 5px;
padding-right: 5px; }
#browser_steps li .control a {
font-size: 70%; }
#browser_steps li.empty {
padding: 0px;
opacity: 0.35; }
#browser_steps li.empty .control {
display: none; }
#browser_steps li:hover {
background: #eee; }
#browser_steps li > label {
display: none; }
#browser-steps-fieldlist {
height: 100%;
overflow-y: scroll; }
#browser-steps .flex-wrapper {
display: flex;
flex-flow: row;
height: 600px;
/*@todo make this dynamic */ }
/* this is duplicate :( */
#browsersteps-selector-wrapper {
height: 100%;
width: 100%;
overflow-y: scroll;
position: relative;
/* nice tall skinny one */ }
#browsersteps-selector-wrapper > img {
position: absolute;
max-width: 100%; }
#browsersteps-selector-wrapper > canvas {
position: relative;
max-width: 100%; }
#browsersteps-selector-wrapper > canvas:hover {
cursor: pointer; }
#browsersteps-selector-wrapper .loader {
position: absolute;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
margin-left: -40px;
z-index: 100;
max-width: 350px;
text-align: center; }
#browsersteps-selector-wrapper .spinner, #browsersteps-selector-wrapper .spinner:after {
width: 80px;
height: 80px;
font-size: 3px; }
#browsersteps-selector-wrapper #browsersteps-click-start {
color: var(--color-grey-400); }
#browsersteps-selector-wrapper #browsersteps-click-start:hover {
cursor: pointer; }
ul#requests-extra_proxies {
list-style: none;
/* tidy up the table to look more "inline" */
/* each proxy entry is a `table` */ }
ul#requests-extra_proxies li > label {
display: none; }
ul#requests-extra_proxies table tr {
display: inline; }
/* spinner */
.spinner,
.spinner:after {
border-radius: 50%;
width: 10px;
height: 10px; }
.spinner {
margin: 0px auto;
font-size: 3px;
vertical-align: middle;
display: inline-block;
text-indent: -9999em;
border-top: 1.1em solid rgba(38, 104, 237, 0.2);
border-right: 1.1em solid rgba(38, 104, 237, 0.2);
border-bottom: 1.1em solid rgba(38, 104, 237, 0.2);
border-left: 1.1em solid #2668ed;
-webkit-transform: translateZ(0);
-ms-transform: translateZ(0);
transform: translateZ(0);
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear; }
@-webkit-keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }
@keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }
/**
* CSS custom properties (aka variables).
*/
@@ -138,130 +271,6 @@ html[data-darkmode="true"] {
html[data-darkmode="true"] .watch-table .unviewed.error {
color: var(--color-watch-table-error); }
/* spinner */
.spinner,
.spinner:after {
border-radius: 50%;
width: 10px;
height: 10px; }
.spinner {
margin: 0px auto;
font-size: 3px;
vertical-align: middle;
display: inline-block;
text-indent: -9999em;
border-top: 1.1em solid rgba(38, 104, 237, 0.2);
border-right: 1.1em solid rgba(38, 104, 237, 0.2);
border-bottom: 1.1em solid rgba(38, 104, 237, 0.2);
border-left: 1.1em solid #2668ed;
-webkit-transform: translateZ(0);
-ms-transform: translateZ(0);
transform: translateZ(0);
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear; }
@-webkit-keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }
@keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }
#browser_steps {
/* convert rows to horizontal cells */ }
#browser_steps th {
display: none; }
#browser_steps li {
list-style: decimal;
padding: 5px; }
#browser_steps li:not(:first-child):hover {
opacity: 1.0; }
#browser_steps li .control {
padding-left: 5px;
padding-right: 5px; }
#browser_steps li .control a {
font-size: 70%; }
#browser_steps li.empty {
padding: 0px;
opacity: 0.35; }
#browser_steps li.empty .control {
display: none; }
#browser_steps li:hover {
background: #eee; }
#browser_steps li > label {
display: none; }
#browser-steps-fieldlist {
height: 100%;
overflow-y: scroll; }
#browser-steps .flex-wrapper {
display: flex;
flex-flow: row;
height: 600px;
/*@todo make this dynamic */ }
/* this is duplicate :( */
#browsersteps-selector-wrapper {
height: 100%;
width: 100%;
overflow-y: scroll;
position: relative;
/* nice tall skinny one */ }
#browsersteps-selector-wrapper > img {
position: absolute;
max-width: 100%; }
#browsersteps-selector-wrapper > canvas {
position: relative;
max-width: 100%; }
#browsersteps-selector-wrapper > canvas:hover {
cursor: pointer; }
#browsersteps-selector-wrapper .loader {
position: absolute;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
margin-left: -40px;
z-index: 100;
max-width: 350px;
text-align: center; }
#browsersteps-selector-wrapper .spinner, #browsersteps-selector-wrapper .spinner:after {
width: 80px;
height: 80px;
font-size: 3px; }
#browsersteps-selector-wrapper #browsersteps-click-start {
color: var(--color-grey-400); }
#browsersteps-selector-wrapper #browsersteps-click-start:hover {
cursor: pointer; }
.arrow {
border: solid #1b98f8;
border-width: 0 2px 2px 0;
display: inline-block;
padding: 3px; }
.arrow.right {
transform: rotate(-45deg);
-webkit-transform: rotate(-45deg); }
.arrow.left {
transform: rotate(135deg);
-webkit-transform: rotate(135deg); }
.arrow.up, .arrow.asc {
transform: rotate(-135deg);
-webkit-transform: rotate(-135deg); }
.arrow.down, .arrow.desc {
transform: rotate(45deg);
-webkit-transform: rotate(45deg); }
body {
color: var(--color-text);
background: var(--color-background-page); }
@@ -275,6 +284,11 @@ body {
white-space: nowrap;
width: 1px; }
.status-icon {
display: inline-block;
height: 1rem;
vertical-align: middle; }
.pure-table-even {
background: var(--color-background); }

View File

@@ -36,7 +36,6 @@ class ChangeDetectionStore:
self.datastore_path = datastore_path
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
self.needs_write = False
self.proxy_list = None
self.start_time = time.time()
self.stop_thread = False
# Base definition for all watchers
@@ -78,10 +77,10 @@ class ChangeDetectionStore:
self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch)
print("Watching:", uuid, self.__data['watching'][uuid]['url'])
# First time ran, doesnt exist.
except (FileNotFoundError, json.decoder.JSONDecodeError):
# First time ran, Create the datastore.
except (FileNotFoundError):
if include_default_watches:
print("Creating JSON store at", self.datastore_path)
print("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
self.add_watch(url='https://news.ycombinator.com/',
tag='Tech news',
extras={'fetch_backend': 'html_requests'})
@@ -89,9 +88,11 @@ class ChangeDetectionStore:
self.add_watch(url='https://changedetection.io/CHANGELOG.txt',
tag='changedetection.io',
extras={'fetch_backend': 'html_requests'})
self.__data['version_tag'] = version_tag
# Just to test that proxies.json if it exists, doesnt throw a parsing error on startup
test_list = self.proxy_list
# Helper to remove password protection
password_reset_lockfile = "{}/removepassword.lock".format(self.datastore_path)
if path.isfile(password_reset_lockfile):
@@ -116,11 +117,6 @@ class ChangeDetectionStore:
secret = secrets.token_hex(16)
self.__data['settings']['application']['api_access_token'] = secret
# Proxy list support - available as a selection in settings when text file is imported
proxy_list_file = "{}/proxies.json".format(self.datastore_path)
if path.isfile(proxy_list_file):
self.import_proxy_list(proxy_list_file)
# Bump the update version by running updates
self.run_updates()
@@ -463,10 +459,30 @@ class ChangeDetectionStore:
print ("Removing",item)
unlink(item)
def import_proxy_list(self, filename):
with open(filename) as f:
self.proxy_list = json.load(f)
print ("Registered proxy list", list(self.proxy_list.keys()))
@property
def proxy_list(self):
proxy_list = {}
proxy_list_file = os.path.join(self.datastore_path, 'proxies.json')
# Load from external config file
if path.isfile(proxy_list_file):
with open("{}/proxies.json".format(self.datastore_path)) as f:
proxy_list = json.load(f)
# Mapping from UI config if available
extras = self.data['settings']['requests'].get('extra_proxies')
if extras:
i=0
for proxy in extras:
i += 0
if proxy.get('proxy_name') and proxy.get('proxy_url'):
k = "ui-" + str(i) + proxy.get('proxy_name')
proxy_list[k] = {'label': proxy.get('proxy_name'), 'url': proxy.get('proxy_url')}
return proxy_list if len(proxy_list) else None
def get_preferred_proxy_for_watch(self, uuid):
@@ -476,11 +492,10 @@ class ChangeDetectionStore:
:return: proxy "key" id
"""
proxy_id = None
if self.proxy_list is None:
return None
# If its a valid one
# If it's a valid one
watch = self.data['watching'].get(uuid)
if watch.get('proxy') and watch.get('proxy') in list(self.proxy_list.keys()):
@@ -493,8 +508,9 @@ class ChangeDetectionStore:
if self.proxy_list.get(system_proxy_id):
return system_proxy_id
# Fallback - Did not resolve anything, use the first available
if system_proxy_id is None:
# Fallback - Did not resolve anything, or doesnt exist, use the first available
if system_proxy_id is None or not self.proxy_list.get(system_proxy_id):
first_default = list(self.proxy_list)[0]
return first_default

View File

@@ -21,6 +21,7 @@
<li class="tab"><a href="#fetching">Fetching</a></li>
<li class="tab"><a href="#filters">Global Filters</a></li>
<li class="tab"><a href="#api">API</a></li>
<li class="tab"><a href="#proxies">CAPTCHA &amp; Proxies</a></li>
</ul>
</div>
<div class="box-wrap inner">
@@ -170,14 +171,29 @@ nav
</div>
</div>
</div>
<div class="tab-pane-inner" id="proxies">
<p><strong>Tip</strong>: You can connect to websites using <a href="https://brightdata.grsm.io/n0r16zf7eivq">BrightData</a> proxies, their service <strong>WebUnlocker</strong> will solve most CAPTCHAs, whilst their <strong>Residential Proxies</strong> may help to avoid CAPTCHA altogether. </p>
<p>It may be easier to try <strong>WebUnlocker</strong> first, WebUnlocker also supports country selection.</p>
<p>
When you have <a href="https://brightdata.grsm.io/n0r16zf7eivq">registered</a>, enabled the required services, visit the <A href="https://brightdata.com/cp/api_example?">API example page</A>, then select <strong>Python</strong>, set the country you wish to use, then copy+paste the example URL below<br/>
The Proxy URL with BrightData should start with <code>http://brd-customer...</code>
</p>
<p>When you sign up using <a href="https://brightdata.grsm.io/n0r16zf7eivq">https://brightdata.grsm.io/n0r16zf7eivq</a> BrightData will match any first deposit up to $150</p>
<div class="pure-control-group">
{{ render_field(form.requests.form.extra_proxies) }}
<span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span>
</div>
</div>
<div id="actions">
<div class="pure-control-group">
{{ render_button(form.save_button) }}
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
<a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-cancel">Clear Snapshot History</a>
</div>
</div>
</form>
</div>

View File

@@ -89,12 +89,21 @@
</td>
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
<a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="icon icon-spread" title="Create a link to share watch config with others" /></a>
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />{% endif %}
<a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img class="status-icon" src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" /></a>
{%if watch.get_fetch_backend == "html_webdriver" %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />{% endif %}
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" />{% endif %}
{% if watch.last_error is defined and watch.last_error != False %}
<div class="fetch-error">{{ watch.last_error }}</div>
<div class="fetch-error">{{ watch.last_error }}
{% if '403' in watch.last_error %}
{% if has_proxies %}
<a href="{{ url_for('settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a>&nbsp;
{% endif %}
<a href="{{ url_for('settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
{% endif %}
</div>
{% endif %}
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
<div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div>
@@ -103,7 +112,7 @@
<div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
{% endif %}
{% if watch['track_ldjson_price_data'] == 'accepted' %}
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="price-follow-tag-icon"/> Price</span>
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon"/> Price</span>
{% endif %}
{% if not active_tag %}
<span class="watch-tag-list">{{ watch.tag}}</span>

View File

@@ -1,10 +1,10 @@
{
"proxy-one": {
"label": "One",
"url": "http://127.0.0.1:3128"
"label": "Proxy One",
"url": "http://squid-one:3128"
},
"proxy-two": {
"label": "two",
"url": "http://127.0.0.1:3129"
"label": "Proxy Two",
"url": "http://squid-two:3128"
}
}

View File

@@ -0,0 +1,48 @@
acl localnet src 0.0.0.1-0.255.255.255 # RFC 1122 "this" network (LAN)
acl localnet src 10.0.0.0/8 # RFC 1918 local private network (LAN)
acl localnet src 100.64.0.0/10 # RFC 6598 shared address space (CGN)
acl localnet src 169.254.0.0/16 # RFC 3927 link-local (directly plugged) machines
acl localnet src 172.16.0.0/12 # RFC 1918 local private network (LAN)
acl localnet src 192.168.0.0/16 # RFC 1918 local private network (LAN)
acl localnet src fc00::/7 # RFC 4193 local private network range
acl localnet src fe80::/10 # RFC 4291 link-local (directly plugged) machines
acl localnet src 159.65.224.174
acl SSL_ports port 443
acl Safe_ports port 80 # http
acl Safe_ports port 21 # ftp
acl Safe_ports port 443 # https
acl Safe_ports port 70 # gopher
acl Safe_ports port 210 # wais
acl Safe_ports port 1025-65535 # unregistered ports
acl Safe_ports port 280 # http-mgmt
acl Safe_ports port 488 # gss-http
acl Safe_ports port 591 # filemaker
acl Safe_ports port 777 # multiling http
acl CONNECT method CONNECT
http_access deny !Safe_ports
http_access deny CONNECT !SSL_ports
#http_access allow localhost manager
http_access deny manager
#http_access allow localhost
#http_access allow localnet
auth_param basic program /usr/lib/squid3/basic_ncsa_auth /etc/squid3/passwords
auth_param basic realm proxy
acl authenticated proxy_auth REQUIRED
http_access allow authenticated
http_access deny all
http_port 3128
coredump_dir /var/spool/squid
refresh_pattern ^ftp: 1440 20% 10080
refresh_pattern ^gopher: 1440 0% 1440
refresh_pattern -i (/cgi-bin/|\?) 0 0% 0
refresh_pattern \/(Packages|Sources)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
refresh_pattern \/Release(|\.gpg)$ 0 0% 0 refresh-ims
refresh_pattern \/InRelease$ 0 0% 0 refresh-ims
refresh_pattern \/(Translation-.*)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
refresh_pattern . 0 20% 4320
logfile_rotate 0

View File

@@ -0,0 +1 @@
test:$apr1$xvhFolTA$E/kz5/Rw1ewcyaSUdwqZs.

View File

@@ -0,0 +1,50 @@
#!/usr/bin/python3
import time
from flask import url_for
from ..util import live_server_setup, wait_for_all_checks
# just make a request, we will grep in the docker logs to see it actually got called
def test_select_custom(client, live_server):
live_server_setup(live_server)
# Goto settings, add our custom one
res = client.post(
url_for("settings_page"),
data={
"requests-time_between_check-minutes": 180,
"application-ignore_whitespace": "y",
"application-fetch_backend": "html_requests",
"requests-extra_proxies-0-proxy_name": "custom-test-proxy",
# test:awesome is set in tests/proxy_list/squid-passwords.txt
"requests-extra_proxies-0-proxy_url": "http://test:awesome@squid-custom:3128",
},
follow_redirects=True
)
assert b"Settings updated." in res.data
res = client.post(
url_for("import_page"),
# Because a URL wont show in squid/proxy logs due it being SSLed
# Use plain HTTP or a specific domain-name here
data={"urls": "https://changedetection.io/CHANGELOG.txt"},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'Proxy Authentication Required' not in res.data
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
# We should see something via proxy
assert b'<div class=""> - 0.' in res.data
#
# Now we should see the request in the container logs for "squid-squid-custom" because it will be the only default

Binary file not shown.

View File

@@ -67,7 +67,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
# Force recheck
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches are queued for rechecking.' in res.data
assert b'1 watches queued for rechecking.' in res.data
wait_for_all_checks(client)

View File

@@ -1,8 +1,7 @@
import os
import time
import re
from flask import url_for
from .util import set_original_response, live_server_setup
from .util import set_original_response, live_server_setup, extract_UUID_from_client
from changedetectionio.model import App
@@ -121,6 +120,10 @@ def run_filter_test(client, content_filter):
notification = f.read()
assert not 'CSS/xPath filter was not present in the page' in notification
# Re #1247 - All tokens got replaced
uuid = extract_UUID_from_client(client)
assert uuid in notification
# cleanup for the next
client.get(
url_for("form_delete", uuid="all"),

View File

@@ -0,0 +1,40 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup
sleep_time_for_fetch_thread = 3
# `subtractive_selectors` should still work in `source:` type requests
def test_fetch_pdf(client, live_server):
import shutil
shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf")
live_server_setup(live_server)
test_url = url_for('test_pdf_endpoint', _external=True)
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'PDF-1.5' not in res.data
assert b'hello world' in res.data
# So we know if the file changes in other ways
import hashlib
md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
# We should have one
assert len(md5) >0
# And it's going to be in the document
assert b'Document checksum - '+bytes(str(md5).encode('utf-8')) in res.data

View File

@@ -40,7 +40,7 @@ def test_check_basic_change_detection_functionality_source(client, live_server):
# Force recheck
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches are queued for rechecking.' in res.data
assert b'1 watches queued for rechecking.' in res.data
time.sleep(5)
@@ -90,4 +90,4 @@ def test_check_ignore_elements(client, live_server):
)
assert b'foobar-detection' not in res.data
assert b'&lt;br' not in res.data
assert b'&lt;p' in res.data
assert b'&lt;p' in res.data

View File

@@ -168,5 +168,15 @@ def live_server_setup(live_server):
def test_return_query():
return request.query_string
@live_server.app.route('/endpoint-test.pdf')
def test_pdf_endpoint():
# Tried using a global var here but didn't seem to work, so reading from a file instead.
with open("test-datastore/endpoint-test.pdf", "rb") as f:
resp = make_response(f.read(), 200)
resp.headers['Content-Type'] = 'application/pdf'
return resp
live_server.start()

View File

@@ -93,7 +93,7 @@ class update_worker(threading.Thread):
return
n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
'notification_body': "Your configured CSS/xPath filters of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
", ".join(watch['include_filters']),
threshold),
'notification_format': 'text'}

View File

@@ -1,4 +1,3 @@
version: '2'
services:
changedetection:
image: ghcr.io/dgtlmoon/changedetection.io

View File

@@ -1,2 +0,0 @@
pytest ~=6.2
pytest-flask ~=1.2

View File

@@ -24,7 +24,7 @@ jsonpath-ng~=1.5.3
# jq not available on Windows so must be installed manually
# Notification library
apprise~=1.2.0
apprise~=1.2.1
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
paho-mqtt
@@ -59,3 +59,7 @@ jq~=1.3 ;python_version >= "3.8" and sys_platform == "linux"
# Any current modern version, required so far for screenshot PNG->JPEG conversion but will be used more in the future
pillow
# playwright is installed at Dockerfile build time because it's not available on all platforms
# Include pytest, so if theres a support issue we can ask them to run these tests on their setup
pytest ~=6.2
pytest-flask ~=1.2