Compare commits

...

11 Commits

Author SHA1 Message Date
dgtlmoon
13576d081b Fix syntax 2022-10-23 14:01:18 +02:00
dgtlmoon
b6373779d0 Also run on PR 2022-10-23 13:57:09 +02:00
dgtlmoon
4cbcc59461 0.39.20.4 2022-10-17 18:36:47 +02:00
dgtlmoon
4be0260381 Better cross platform file handling in diff and preview (#1034) 2022-10-17 18:36:22 +02:00
dgtlmoon
957a3c1c16 0.39.20.3 2022-10-17 17:43:35 +02:00
dgtlmoon
85897e0bf9 Windows - diff file handling improvements (#1031) 2022-10-17 17:40:28 +02:00
dgtlmoon
63095f70ea Also include tests in pip build 2022-10-17 17:13:15 +02:00
dgtlmoon
8d5b0b5576 Update README.md 2022-10-12 10:51:39 +02:00
dgtlmoon
1b077abd93 0.39.20.2 2022-10-12 09:53:59 +02:00
dgtlmoon
32ea1a8721 Windows - JQ - Make library optional so it doesnt break Windows pip installs (#1009) 2022-10-12 09:53:16 +02:00
dgtlmoon
fff32cef0d Adding test - Test the 'execute JS before changedetection' (#1006) 2022-10-11 14:40:36 +02:00
13 changed files with 112 additions and 80 deletions

View File

@@ -1,8 +1,7 @@
name: ChangeDetection.io Container Build Test
# Triggers the workflow on push or pull request events
on:
push:
on: [push, pull_request]
paths:
- requirements.txt
- Dockerfile

View File

@@ -26,6 +26,11 @@ RUN pip install --target=/dependencies -r /requirements.txt
RUN pip install --target=/dependencies playwright~=1.26 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
RUN pip install --target=/dependencies jq~=1.3 \
|| echo "WARN: Failed to install JQ. The application can still run, but the Jq: filter option will be disabled."
# Final image stage
FROM python:3.8-slim

View File

@@ -2,6 +2,7 @@ recursive-include changedetectionio/api *
recursive-include changedetectionio/templates *
recursive-include changedetectionio/static *
recursive-include changedetectionio/model *
recursive-include changedetectionio/tests *
include changedetection.py
global-exclude *.pyc
global-exclude node_modules

View File

@@ -121,8 +121,8 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
## Filters
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
(We support LXML `re:test`, `re:math` and `re:replace`.)
## Notifications
@@ -161,46 +161,14 @@ This will re-parse the JSON and apply formatting to the text, making it super ea
### JSONPath or jq?
For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more information on jq.
For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more specifc information on jq.
The example below adds the price in dollars to each item in the JSON data, and then filters to only show items that are greater than 10.
One big advantage of `jq` is that you can use logic in your JSON filter, such as filters to only show items that have a value greater than/less than etc.
#### Sample input data from API
```
{
"items": [
{
"name": "Product A",
"priceInCents": 2500
},
{
"name": "Product B",
"priceInCents": 500
},
{
"name": "Product C",
"priceInCents": 2000
}
]
}
```
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples
#### Sample jq
`jq:.items[] | . + { "priceInDollars": (.priceInCents / 100) } | select(.priceInDollars > 10)`
Note: `jq` library must be added separately (`pip3 install jq`)
#### Sample output data
```
{
"name": "Product A",
"priceInCents": 2500,
"priceInDollars": 25
}
{
"name": "Product C",
"priceInCents": 2000,
"priceInDollars": 20
}
```
### Parse JSON embedded in HTML!

View File

@@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools
from changedetectionio.api import api_v1
__version__ = '0.39.20.1'
__version__ = '0.39.20.4'
datastore = None
@@ -636,20 +636,27 @@ def changedetection_app(config=None, datastore_o=None):
# Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
jq_support = True
try:
import jq
except ModuleNotFoundError:
jq_support = False
output = render_template("edit.html",
uuid=uuid,
watch=datastore.data['watching'][uuid],
form=form,
has_empty_checktime=using_default_check_time,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
using_global_webdriver_wait=default['webdriver_delay'] is None,
current_base_url=datastore.data['settings']['application']['base_url'],
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
form=form,
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
has_empty_checktime=using_default_check_time,
jq_support=jq_support,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
settings_application=datastore.data['settings']['application'],
using_global_webdriver_wait=default['webdriver_delay'] is None,
uuid=uuid,
visualselector_data_is_ready=visualselector_data_is_ready,
visualselector_enabled=visualselector_enabled,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
watch=datastore.data['watching'][uuid],
)
return output
@@ -809,8 +816,10 @@ def changedetection_app(config=None, datastore_o=None):
newest_file = history[dates[-1]]
# Read as binary and force decode as UTF-8
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
try:
with open(newest_file, 'r') as f:
with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
newest_version_file_contents = f.read()
except Exception as e:
newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
@@ -823,7 +832,7 @@ def changedetection_app(config=None, datastore_o=None):
previous_file = history[dates[-2]]
try:
with open(previous_file, 'r') as f:
with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
previous_version_file_contents = f.read()
except Exception as e:
previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
@@ -900,7 +909,7 @@ def changedetection_app(config=None, datastore_o=None):
timestamp = list(watch.history.keys())[-1]
filename = watch.history[timestamp]
try:
with open(filename, 'r') as f:
with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
tmp = f.readlines()
# Get what needs to be highlighted

View File

@@ -303,12 +303,16 @@ class ValidateCSSJSONXPATHInput(object):
# Re #265 - maybe in the future fetch the page and offer a
# warning/notice that its possible the rule doesnt yet match anything?
if 'jq:' in line:
if not self.allow_json:
raise ValidationError("jq not permitted in this field!")
import jq
if 'jq:' in line:
try:
import jq
except ModuleNotFoundError:
# `jq` requires full compilation in windows and so isn't generally available
raise ValidationError("jq not support not found")
input = line.replace('jq:', '')
try:

View File

@@ -1,12 +1,11 @@
import json
from typing import List
from bs4 import BeautifulSoup
from jsonpath_ng.ext import parse
import jq
import re
from inscriptis import get_text
from inscriptis.model.config import ParserConfig
from jsonpath_ng.ext import parse
from typing import List
import json
import re
class FilterNotFoundInResponse(ValueError):
def __init__(self, msg):
@@ -85,9 +84,18 @@ def _parse_json(json_data, json_filter):
jsonpath_expression = parse(json_filter.replace('json:', ''))
match = jsonpath_expression.find(json_data)
return _get_stripped_text_from_json_match(match)
if 'jq:' in json_filter:
try:
import jq
except ModuleNotFoundError:
# `jq` requires full compilation in windows and so isn't generally available
raise Exception("jq not support not found")
jq_expression = jq.compile(json_filter.replace('jq:', ''))
match = jq_expression.input(json_data).all()
return _get_stripped_text_from_json_match(match)
def _get_stripped_text_from_json_match(match):

View File

@@ -151,28 +151,30 @@ class model(dict):
import uuid
import logging
output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
output_path = os.path.join(self.__datastore_path, self['uuid'])
self.ensure_data_dir_exists()
snapshot_fname = os.path.join(output_path, str(uuid.uuid4()))
snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
logging.debug("Saving history text {}".format(snapshot_fname))
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
# most sites are utf-8 and some are even broken utf-8
with open(snapshot_fname, 'wb') as f:
f.write(contents)
f.close()
# Append to index
# @todo check last char was \n
index_fname = "{}/history.txt".format(output_path)
index_fname = os.path.join(output_path, "history.txt")
with open(index_fname, 'a') as f:
f.write("{},{}\n".format(timestamp, snapshot_fname))
f.close()
self.__newest_history_key = timestamp
self.__history_n+=1
self.__history_n += 1
#@todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
# @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
return snapshot_fname
@property

View File

@@ -23,6 +23,13 @@ export BASE_URL="https://really-unique-domain.io"
pytest tests/test_notification.py
## JQ + JSON: filter test
# jq is not available on windows and we should just test it when the package is installed
# this will re-test with jq support
pip3 install jq~=1.3
pytest tests/test_jsonpath_jq_selector.py
# Now for the selenium and playwright/browserless fetchers
# Note - this is not UI functional tests - just checking that each one can fetch the content

View File

@@ -184,10 +184,14 @@ User-Agent: wonderbra 1.0") }}
<span class="pure-form-message-inline">
<ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a>.
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}
</ul>
</li>
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
@@ -198,7 +202,7 @@ User-Agent: wonderbra 1.0") }}
</ul>
</li>
</ul>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath, or jq selector rules before filing an issue on GitHub! <a
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
</span>
</div>

View File

@@ -5,7 +5,12 @@ import time
from flask import url_for, escape
from . util import live_server_setup
import pytest
jq_support = True
try:
import jq
except ModuleNotFoundError:
jq_support = False
def test_setup(live_server):
live_server_setup(live_server)
@@ -40,22 +45,24 @@ and it can also be repeated
assert text == "23.5"
# also check for jq
text = html_tools.extract_json_as_string(content, "jq:.offers.price")
assert text == "23.5"
if jq_support:
text = html_tools.extract_json_as_string(content, "jq:.offers.price")
assert text == "23.5"
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
assert text == "5"
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
assert text == "5"
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
assert text == "5"
# When nothing at all is found, it should throw JSONNOTFound
# Which is caught and shown to the user in the watch-overview table
with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id")
with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
if jq_support:
with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
def set_original_ext_response():
data = """
@@ -271,7 +278,8 @@ def test_check_jsonpath_filter(client, live_server):
check_json_filter('json:boss.name', client, live_server)
def test_check_jq_filter(client, live_server):
check_json_filter('jq:.boss.name', client, live_server)
if jq_support:
check_json_filter('jq:.boss.name', client, live_server)
def check_json_filter_bool_val(json_filter, client, live_server):
set_original_response()
@@ -329,7 +337,8 @@ def test_check_jsonpath_filter_bool_val(client, live_server):
check_json_filter_bool_val("json:$['available']", client, live_server)
def test_check_jq_filter_bool_val(client, live_server):
check_json_filter_bool_val("jq:.available", client, live_server)
if jq_support:
check_json_filter_bool_val("jq:.available", client, live_server)
# Re #265 - Extended JSON selector test
# Stuff to consider here
@@ -408,4 +417,5 @@ def test_check_jsonpath_ext_filter(client, live_server):
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
def test_check_jq_ext_filter(client, live_server):
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
if jq_support:
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)

View File

@@ -13,9 +13,9 @@ def test_visual_selector_content_ready(client, live_server):
live_server_setup(live_server)
time.sleep(1)
# Add our URL to the import page, maybe better to use something we control?
# We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket
test_url = 'https://news.ycombinator.com'
# Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
test_url = "https://changedetection.io/ci-test/test-runjs.html"
res = client.post(
url_for("form_quick_watch_add"),
data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
@@ -25,13 +25,27 @@ def test_visual_selector_content_ready(client, live_server):
res = client.post(
url_for("edit_page", uuid="first", unpause_on_save=1),
data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"},
data={
"url": test_url,
"tag": "",
"headers": "",
'fetch_backend': "html_webdriver",
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();'
},
follow_redirects=True
)
assert b"unpaused" in res.data
time.sleep(1)
wait_for_all_checks(client)
uuid = extract_UUID_from_client(client)
# Check the JS execute code before extract worked
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'I smell JavaScript' in res.data
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"

View File

@@ -19,7 +19,8 @@ chardet > 2.3.0
wtforms ~= 3.0
jsonpath-ng ~= 1.5.3
jq ~= 1.3.0
# jq not available on Windows so must be installed manually
# Notification library
apprise ~= 1.1.0