Compare commits

..

3 Commits

Author SHA1 Message Date
dgtlmoon
8578cc3582 Fix tuple 2022-10-09 18:10:24 +02:00
dgtlmoon
b72d6f8dec use brotli package 2022-10-09 18:10:14 +02:00
dgtlmoon
5b3f240846 Dont use default Requests user-agent and accept headers in playwright+selenium requests, breaks sites such as united.com. 2022-10-09 17:54:13 +02:00
11 changed files with 44 additions and 135 deletions

View File

@@ -1,46 +0,0 @@
name: ChangeDetection.io Container Build Test
# Triggers the workflow on push or pull request events
on:
push:
paths:
- requirements.txt
- Dockerfile
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
# @todo: some kind of path filter for requirements.txt and Dockerfile
jobs:
test-container-build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
# Just test that the build works, some libraries won't compile on ARM/rPi etc
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
with:
image: tonistiigi/binfmt:latest
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v1
with:
install: true
version: latest
driver-opts: image=moby/buildkit:master
- name: Test that the docker containers can build
id: docker_build
uses: docker/build-push-action@v2
# https://github.com/docker/build-push-action#customizing
with:
context: ./
file: ./Dockerfile
platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64,
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache

View File

@@ -1,25 +1,28 @@
name: ChangeDetection.io App Test name: ChangeDetection.io Test
# Triggers the workflow on push or pull request events # Triggers the workflow on push or pull request events
on: [push, pull_request] on: [push, pull_request]
jobs: jobs:
test-application: test-build:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Set up Python 3.9 - name: Set up Python 3.9
uses: actions/setup-python@v2 uses: actions/setup-python@v2
with: with:
python-version: 3.9 python-version: 3.9
- name: Show env vars
run: set
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install flake8 pytest pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
- name: Lint with flake8 - name: Lint with flake8
run: | run: |
# stop the build if there are Python syntax errors or undefined names # stop the build if there are Python syntax errors or undefined names
@@ -36,4 +39,7 @@ jobs:
# Each test is totally isolated and performs its own cleanup/reset # Each test is totally isolated and performs its own cleanup/reset
cd changedetectionio; ./run_all_tests.sh cd changedetectionio; ./run_all_tests.sh
# https://github.com/docker/build-push-action/blob/master/docs/advanced/test-before-push.md ?
# https://github.com/docker/buildx/issues/59 ? Needs to be one platform?
# https://github.com/docker/buildx/issues/495#issuecomment-918925854

View File

@@ -5,14 +5,13 @@ FROM python:3.8-slim as builder
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
g++ \ libssl-dev \
libffi-dev \
gcc \ gcc \
libc-dev \ libc-dev \
libffi-dev \
libssl-dev \
libxslt-dev \ libxslt-dev \
make \ zlib1g-dev \
zlib1g-dev g++
RUN mkdir /install RUN mkdir /install
WORKDIR /install WORKDIR /install
@@ -26,11 +25,6 @@ RUN pip install --target=/dependencies -r /requirements.txt
RUN pip install --target=/dependencies playwright~=1.26 \ RUN pip install --target=/dependencies playwright~=1.26 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
RUN pip install --target=/dependencies jq~=1.3 \
|| echo "WARN: Failed to install JQ. The application can still run, but the Jq: filter option will be disabled."
# Final image stage # Final image stage
FROM python:3.8-slim FROM python:3.8-slim

View File

@@ -121,8 +121,8 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
## Filters ## Filters
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
(We support LXML `re:test`, `re:math` and `re:replace`.) (We support LXML `re:test`, `re:math` and `re:replace`.)
## Notifications ## Notifications
@@ -163,11 +163,7 @@ This will re-parse the JSON and apply formatting to the text, making it super ea
For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more information on jq. For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more information on jq.
Notes: The example below adds the price in dollars to each item in the JSON data, and then filters to only show items that are greater than 10.
- `jq` must be added manually separately from the installation of changedetection.io (simply run `pip3 install jq`)
- `jq` is not available on Windows or must be manually compiled (No "wheel" package available on pypi)
- The example below adds the price in dollars to each item in the JSON data, and then filters to only show items that are greater than 10.
#### Sample input data from API #### Sample input data from API
``` ```

View File

@@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools from changedetectionio import html_tools
from changedetectionio.api import api_v1 from changedetectionio.api import api_v1
__version__ = '0.39.20.1' __version__ = '0.39.20'
datastore = None datastore = None
@@ -636,27 +636,20 @@ def changedetection_app(config=None, datastore_o=None):
# Only works reliably with Playwright # Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver' visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
jq_support = True
try:
import jq
except ModuleNotFoundError:
jq_support = False
output = render_template("edit.html", output = render_template("edit.html",
uuid=uuid,
watch=datastore.data['watching'][uuid],
form=form,
has_empty_checktime=using_default_check_time,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
using_global_webdriver_wait=default['webdriver_delay'] is None,
current_base_url=datastore.data['settings']['application']['base_url'], current_base_url=datastore.data['settings']['application']['base_url'],
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
form=form,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
has_empty_checktime=using_default_check_time,
jq_support=jq_support,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
settings_application=datastore.data['settings']['application'], settings_application=datastore.data['settings']['application'],
using_global_webdriver_wait=default['webdriver_delay'] is None,
uuid=uuid,
visualselector_data_is_ready=visualselector_data_is_ready, visualselector_data_is_ready=visualselector_data_is_ready,
visualselector_enabled=visualselector_enabled, visualselector_enabled=visualselector_enabled,
watch=datastore.data['watching'][uuid], playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
) )
return output return output

View File

@@ -303,16 +303,12 @@ class ValidateCSSJSONXPATHInput(object):
# Re #265 - maybe in the future fetch the page and offer a # Re #265 - maybe in the future fetch the page and offer a
# warning/notice that its possible the rule doesnt yet match anything? # warning/notice that its possible the rule doesnt yet match anything?
if 'jq:' in line:
if not self.allow_json: if not self.allow_json:
raise ValidationError("jq not permitted in this field!") raise ValidationError("jq not permitted in this field!")
if 'jq:' in line: import jq
try:
import jq
except ModuleNotFoundError:
# `jq` requires full compilation in windows and so isn't generally available
raise ValidationError("jq not support not found")
input = line.replace('jq:', '') input = line.replace('jq:', '')
try: try:

View File

@@ -1,11 +1,12 @@
import json
from typing import List
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from jsonpath_ng.ext import parse
import jq
import re
from inscriptis import get_text from inscriptis import get_text
from inscriptis.model.config import ParserConfig from inscriptis.model.config import ParserConfig
from jsonpath_ng.ext import parse
from typing import List
import json
import re
class FilterNotFoundInResponse(ValueError): class FilterNotFoundInResponse(ValueError):
def __init__(self, msg): def __init__(self, msg):
@@ -84,18 +85,9 @@ def _parse_json(json_data, json_filter):
jsonpath_expression = parse(json_filter.replace('json:', '')) jsonpath_expression = parse(json_filter.replace('json:', ''))
match = jsonpath_expression.find(json_data) match = jsonpath_expression.find(json_data)
return _get_stripped_text_from_json_match(match) return _get_stripped_text_from_json_match(match)
if 'jq:' in json_filter: if 'jq:' in json_filter:
try:
import jq
except ModuleNotFoundError:
# `jq` requires full compilation in windows and so isn't generally available
raise Exception("jq not support not found")
jq_expression = jq.compile(json_filter.replace('jq:', '')) jq_expression = jq.compile(json_filter.replace('jq:', ''))
match = jq_expression.input(json_data).all() match = jq_expression.input(json_data).all()
return _get_stripped_text_from_json_match(match) return _get_stripped_text_from_json_match(match)
def _get_stripped_text_from_json_match(match): def _get_stripped_text_from_json_match(match):

View File

@@ -23,13 +23,6 @@ export BASE_URL="https://really-unique-domain.io"
pytest tests/test_notification.py pytest tests/test_notification.py
## JQ + JSON: filter test
# jq is not available on windows and we should just test it when the package is installed
# this will re-test with jq support
pip3 install jq~=1.3
pytest tests/test_jsonpath_jq_selector.py
# Now for the selenium and playwright/browserless fetchers # Now for the selenium and playwright/browserless fetchers
# Note - this is not UI functional tests - just checking that each one can fetch the content # Note - this is not UI functional tests - just checking that each one can fetch the content

View File

@@ -184,14 +184,10 @@ User-Agent: wonderbra 1.0") }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a>.
<ul> <ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li> <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li> <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}
</ul> </ul>
</li> </li>
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash, <li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
@@ -202,7 +198,7 @@ User-Agent: wonderbra 1.0") }}
</ul> </ul>
</li> </li>
</ul> </ul>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath, or jq selector rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/> href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
</span> </span>
</div> </div>

View File

@@ -5,12 +5,7 @@ import time
from flask import url_for, escape from flask import url_for, escape
from . util import live_server_setup from . util import live_server_setup
import pytest import pytest
jq_support = True
try:
import jq
except ModuleNotFoundError:
jq_support = False
def test_setup(live_server): def test_setup(live_server):
live_server_setup(live_server) live_server_setup(live_server)
@@ -45,24 +40,22 @@ and it can also be repeated
assert text == "23.5" assert text == "23.5"
# also check for jq # also check for jq
if jq_support: text = html_tools.extract_json_as_string(content, "jq:.offers.price")
text = html_tools.extract_json_as_string(content, "jq:.offers.price") assert text == "23.5"
assert text == "23.5"
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
assert text == "5"
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id") text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
assert text == "5" assert text == "5"
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
assert text == "5"
# When nothing at all is found, it should throw JSONNOTFound # When nothing at all is found, it should throw JSONNOTFound
# Which is caught and shown to the user in the watch-overview table # Which is caught and shown to the user in the watch-overview table
with pytest.raises(html_tools.JSONNotFound) as e_info: with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id") html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id")
if jq_support: with pytest.raises(html_tools.JSONNotFound) as e_info:
with pytest.raises(html_tools.JSONNotFound) as e_info: html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
def set_original_ext_response(): def set_original_ext_response():
data = """ data = """
@@ -278,8 +271,7 @@ def test_check_jsonpath_filter(client, live_server):
check_json_filter('json:boss.name', client, live_server) check_json_filter('json:boss.name', client, live_server)
def test_check_jq_filter(client, live_server): def test_check_jq_filter(client, live_server):
if jq_support: check_json_filter('jq:.boss.name', client, live_server)
check_json_filter('jq:.boss.name', client, live_server)
def check_json_filter_bool_val(json_filter, client, live_server): def check_json_filter_bool_val(json_filter, client, live_server):
set_original_response() set_original_response()
@@ -337,8 +329,7 @@ def test_check_jsonpath_filter_bool_val(client, live_server):
check_json_filter_bool_val("json:$['available']", client, live_server) check_json_filter_bool_val("json:$['available']", client, live_server)
def test_check_jq_filter_bool_val(client, live_server): def test_check_jq_filter_bool_val(client, live_server):
if jq_support: check_json_filter_bool_val("jq:.available", client, live_server)
check_json_filter_bool_val("jq:.available", client, live_server)
# Re #265 - Extended JSON selector test # Re #265 - Extended JSON selector test
# Stuff to consider here # Stuff to consider here
@@ -417,5 +408,4 @@ def test_check_jsonpath_ext_filter(client, live_server):
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server) check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
def test_check_jq_ext_filter(client, live_server): def test_check_jq_ext_filter(client, live_server):
if jq_support: check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)

View File

@@ -19,8 +19,7 @@ chardet > 2.3.0
wtforms ~= 3.0 wtforms ~= 3.0
jsonpath-ng ~= 1.5.3 jsonpath-ng ~= 1.5.3
jq ~= 1.3.0
# jq not available on Windows so must be installed manually
# Notification library # Notification library
apprise ~= 1.1.0 apprise ~= 1.1.0