Compare commits

..

17 Commits

Author SHA1 Message Date
dgtlmoon
c55bf418c5 0.39.3 release 2021-10-28 11:32:33 +02:00
dgtlmoon
4bbb7d99b6 Re #264 - fixing clone watch operation 2021-10-28 11:29:59 +02:00
dgtlmoon
a8e92e2226 Re #265 - extended jsonpath support (#266)
* Re #265 - Use extended JSONpath support,
Allow a JSONPath selector to not match anything (yet)
Adding test
Correctly capture invalid JSONPath query error
2021-10-27 09:24:08 +02:00
dgtlmoon
c17327633f Merge branch 'master' of github.com:dgtlmoon/changedetection.io 2021-10-26 22:32:29 +02:00
dgtlmoon
56d1dde7c3 Re #265 - wasnt catching the jsonpath exception due to invalid jsonpath expressions properly 2021-10-26 22:30:58 +02:00
dgtlmoon
6e4ddacaf8 Re #257 - Handle bool val of json path better (#263)
* Re #257 - Handle bool val of json path better, with test
2021-10-21 23:25:38 +02:00
dgtlmoon
3195ffa1c6 Re #249 - Add EXPOSE 5000 to Dockerfile 2021-10-06 22:28:35 +02:00
dgtlmoon
c749d2ee44 Merge branch 'master' of github.com:dgtlmoon/changedetection.io 2021-10-06 20:51:38 +02:00
dgtlmoon
ec94359f3c Provide better combination of chardet and urllib3 2021-10-06 20:51:05 +02:00
dgtlmoon
4d0bd58eb1 Prefer GHCR.io over DockerHub (#245)
* Prefer GHCR.io over DockerHub (DockerHub limits pulls)
2021-10-06 13:07:56 +02:00
dgtlmoon
3525f43469 Limit branches/tags of container build
Limit branch
2021-10-06 12:27:02 +02:00
dgtlmoon
d70252c1eb Re #213 - Adding screensize examples to selenium container 2021-10-06 11:34:24 +02:00
dgtlmoon
b57b94c63a Be more specific about tagged release builds 2021-10-06 11:28:39 +02:00
dgtlmoon
9e914c140e Fix :latest release worflow syntax check 2021-10-06 10:27:03 +02:00
dgtlmoon
5d5ceb2f52 Form helper - explain where the webdriver setting comes from 2021-10-06 09:27:41 +02:00
dgtlmoon
bc0303c5da Rename workflow name 2021-10-06 08:59:03 +02:00
dgtlmoon
1240da4a6e Just 'published' and 'edited' package release is enough (remove 'created') 2021-10-06 08:52:10 +02:00
12 changed files with 217 additions and 53 deletions

View File

@@ -1,14 +1,16 @@
name: Build and push containers
on:
# Automatically triggered by a testing workflow passing
# Automatically triggered by a testing workflow passing, but this is only checked when it lands in the `master`/default branch
workflow_run:
workflows: ["ChangeDetection.io Test"]
branches: [master]
tags: ['0.*']
types: [completed]
# Or a new tagged release
release:
types: [created, published, edited]
types: [published, edited]
jobs:
metadata:
@@ -23,10 +25,11 @@ jobs:
echo Ref ${{ github.ref }}
echo c: ${{ github.event.workflow_run.conclusion }}
echo r: ${{ github.event.workflow_run }}
echo tname: ${{ github.event.release.tag_name }}
echo tname: "${{ github.event.release.tag_name }}"
echo headbranch: -${{ github.event.workflow_run.head_branch }}-
set
on-success:
build-push-containers:
runs-on: ubuntu-latest
# If the testing workflow has a success, then we build to :latest
# Or if we are in a tagged release scenario.
@@ -81,7 +84,7 @@ jobs:
# master always builds :latest
- name: Build and push :latest
id: docker_build
if: ${{ github.ref == 'refs/heads/master'}}
if: ${{ github.ref }} == "refs/heads/master"
uses: docker/build-push-action@v2
with:
context: ./
@@ -97,7 +100,7 @@ jobs:
# A new tagged release is required, which builds :tag
- name: Build and push :tag
id: docker_build_tag_release
if: ${{ github.event.release.tag_name }} != ''
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
uses: docker/build-push-action@v2
with:
context: ./
@@ -110,7 +113,6 @@ jobs:
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
- name: Image digest
run: echo step SHA ${{ steps.vars.outputs.sha_short }} tag ${{steps.vars.outputs.tag}} branch ${{steps.vars.outputs.branch}} digest ${{ steps.docker_build.outputs.digest }}

View File

@@ -46,6 +46,8 @@ RUN [ ! -d "/datastore" ] && mkdir /datastore
COPY --from=builder /dependencies /usr/local
ENV PYTHONPATH=/usr/local
EXPOSE 5000
# The actual flask app
COPY changedetectionio /app/changedetectionio
# The eventlet server wrapper

View File

@@ -41,15 +41,15 @@ Run the python code on your own machine by cloning this repository, or with <a h
### Docker
Check out our Docker hub page https://hub.docker.com/r/dgtlmoon/changedetection.io
_Note:_ We also use GitHub's container repository, because DockerHub has limited pull/downloads.
With Docker composer, just clone this repository and
With Docker composer, just clone this repository and..
```bash
$ docker-compose up -d
```
Docker standalone
```bash
$ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io
$ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io ghcr.io/dgtlmoon/changedetection.io
```
### Python Pip
@@ -69,10 +69,10 @@ _Now with per-site configurable support for using a fast built in HTTP fetcher o
### Docker
```
docker pull dgtlmoon/changedetection.io
docker pull ghcr.io/dgtlmoon/changedetection.io
docker kill $(docker ps -a|grep changedetection.io|awk '{print $1}')
docker rm $(docker ps -a|grep changedetection.io|awk '{print $1}')
docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io
docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io ghcr.io/dgtlmoon/changedetection.io
```
### docker-compose

View File

@@ -30,7 +30,7 @@ import datetime
import pytz
from copy import deepcopy
__version__ = '0.39.2'
__version__ = '0.39.3'
datastore = None
@@ -778,9 +778,9 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/api/clone", methods=['GET'])
@login_required
def api_clone():
uuid = request.args.get('uuid')
datastore.clone(uuid)
new_uuid = datastore.clone(uuid)
update_q.put(new_uuid)
flash('Cloned.')
return redirect(url_for('index'))
@@ -902,7 +902,6 @@ def ticker_thread_check_time_launch_checks():
# Check for watches outside of the time threshold to put in the thread queue.
for uuid, watch in copied_datastore.data['watching'].items():
# If they supplied an individual entry minutes to threshold.
if 'minutes_between_check' in watch and watch['minutes_between_check'] is not None:
# Cast to int just incase

View File

@@ -178,17 +178,19 @@ class ValidateCSSJSONInput(object):
def __call__(self, form, field):
if 'json:' in field.data:
from jsonpath_ng.exceptions import JsonPathParserError
from jsonpath_ng import jsonpath, parse
from jsonpath_ng.exceptions import JsonPathParserError, JsonPathLexerError
from jsonpath_ng.ext import parse
input = field.data.replace('json:', '')
try:
parse(input)
except JsonPathParserError as e:
except (JsonPathParserError, JsonPathLexerError) as e:
message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)')
raise ValidationError(message % (input, str(e)))
# Re #265 - maybe in the future fetch the page and offer a
# warning/notice that its possible the rule doesnt yet match anything?
class quickWatchForm(Form):
# https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5

View File

@@ -1,6 +1,6 @@
import json
from bs4 import BeautifulSoup
from jsonpath_ng import parse
from jsonpath_ng.ext import parse
class JSONNotFound(ValueError):
@@ -45,8 +45,10 @@ def _parse_json(json_data, jsonpath_filter):
if len(match) == 1:
s = match[0].value
if not s:
raise JSONNotFound("No Matching JSON could be found for the rule {}".format(jsonpath_filter.replace('json:', '')))
# Re #257 - Better handling where it does not exist, in the case the original 's' value was False..
if not match:
# Re 265 - Just return an empty string when filter not found
return ''
stripped_text_from_html = json.dumps(s, indent=4)
@@ -85,6 +87,7 @@ def extract_json_as_string(content, jsonpath_filter):
break
if not stripped_text_from_html:
raise JSONNotFound("No JSON matching the rule '%s' found" % jsonpath_filter.replace('json:',''))
# Re 265 - Just return an empty string when filter not found
return ''
return stripped_text_from_html

View File

@@ -251,24 +251,10 @@ class ChangeDetectionStore:
# Clone a watch by UUID
def clone(self, uuid):
with self.lock:
new_uuid = str(uuid_builder.uuid4())
_clone = deepcopy(self.data['watching'][uuid])
_clone.update({'uuid': new_uuid})
attributes_to_reset = [
'last_checked',
'last_changed',
'last_viewed',
'newest_history_key',
'previous_md5',
'history'
]
for attribute in attributes_to_reset:
_clone.update({attribute: self.generic_definition[attribute]})
self.data['watching'][new_uuid] = _clone
self.needs_write = True
url = self.data['watching'][uuid]['url']
tag = self.data['watching'][uuid]['tag']
new_uuid = self.add_watch(url=url, tag=tag)
return new_uuid
def url_exists(self, url):

View File

@@ -53,7 +53,7 @@ User-Agent: wonderbra 1.0") }}
{{ render_field(form.fetch_backend) }}
<span class="pure-form-message-inline">
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server. </p>
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
</span>
</div>
<div class="pure-control-group">

View File

@@ -61,7 +61,7 @@
{{ render_field(form.fetch_backend) }}
<span class="pure-form-message-inline">
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server. </p>
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
</span>
</div>
</div>

View File

@@ -5,6 +5,10 @@ from flask import url_for
from . util import live_server_setup
import pytest
def test_setup(live_server):
live_server_setup(live_server)
def test_unittest_inline_html_extract():
# So lets pretend that the JSON we want is inside some HTML
content="""
@@ -42,6 +46,45 @@ and it can also be repeated
with pytest.raises(html_tools.JSONNotFound) as e_info:
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "$.id")
def set_original_ext_response():
data = """
[
{
"isPriceLowered": false,
"status": "ForSale",
"statusOrig": "for sale"
},
{
"_id": "5e7b3e1fb3262d306323ff1e",
"listingsType": "consumer",
"status": "ForSale",
"statusOrig": "for sale"
}
]
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(data)
def set_modified_ext_response():
data = """
[
{
"isPriceLowered": false,
"status": "Sold",
"statusOrig": "sold"
},
{
"_id": "5e7b3e1fb3262d306323ff1e",
"listingsType": "consumer",
"isPriceLowered": false,
"status": "Sold"
}
]
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(data)
def set_original_response():
test_return_data = """
@@ -60,7 +103,8 @@ def set_original_response():
],
"boss": {
"name": "Fat guy"
}
},
"available": true
}
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
@@ -84,7 +128,8 @@ def set_modified_response():
],
"boss": {
"name": "Foobar"
}
},
"available": false
}
"""
@@ -93,11 +138,7 @@ def set_modified_response():
return None
def test_check_json_filter(client, live_server):
live_server_setup(live_server)
json_filter = 'json:boss.name'
set_original_response()
@@ -161,3 +202,126 @@ def test_check_json_filter(client, live_server):
res = client.get(url_for("diff_history_page", uuid="first"))
# But the change should be there, tho its hard to test the change was detected because it will show old and new versions
assert b'Foobar' in res.data
def test_check_json_filter_bool_val(client, live_server):
json_filter = "json:$['available']"
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Goto the edit page, add our ignore text
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"css_filter": json_filter,
"url": test_url,
"tag": "",
"headers": "",
"fetch_backend": "html_requests"
},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(3)
# Make a change
set_modified_response()
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(3)
res = client.get(url_for("diff_history_page", uuid="first"))
# But the change should be there, tho its hard to test the change was detected because it will show old and new versions
assert b'false' in res.data
# Re #265 - Extended JSON selector test
# Stuff to consider here
# - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
# - The 'diff' tab could show the old and new content
# - Form should let us enter a selector that doesnt (yet) match anything
def test_check_json_ext_filter(client, live_server):
json_filter = 'json:$[?(@.status==Sold)]'
set_original_ext_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(3)
# Goto the edit page, add our ignore text
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"css_filter": json_filter,
"url": test_url,
"tag": "",
"headers": "",
"fetch_backend": "html_requests"
},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Check it saved
res = client.get(
url_for("edit_page", uuid="first"),
)
assert bytes(json_filter.encode('utf-8')) in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(3)
# Make a change
set_modified_ext_response()
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(4)
# It should have 'unviewed'
res = client.get(url_for("index"))
assert b'unviewed' in res.data
res = client.get(url_for("diff_history_page", uuid="first"))
# We should never see 'ForSale' because we are selecting on 'Sold' in the rule,
# But we should know it triggered ('unviewed' assert above)
assert b'ForSale' not in res.data
assert b'Sold' in res.data

View File

@@ -1,7 +1,7 @@
version: '2'
services:
changedetection.io:
image: dgtlmoon/changedetection.io
image: ghcr.io/dgtlmoon/changedetection.io
container_name: changedetection.io
hostname: changedetection.io
volumes:
@@ -40,6 +40,9 @@ services:
# image: selenium/standalone-chrome-debug:3.141.59
# environment:
# - VNC_NO_PASSWORD=1
# - SCREEN_WIDTH=1920
# - SCREEN_HEIGHT=1080
# - SCREEN_DEPTH=24
# volumes:
# # Workaround to avoid the browser crashing inside a docker container
# # See https://github.com/SeleniumHQ/docker-selenium#quick-start

View File

@@ -1,15 +1,18 @@
chardet==2.3.0
flask~= 1.0
eventlet>=0.31.0
requests[socks] ~= 2.15
validators
timeago ~=1.0
inscriptis ~= 1.2
feedgen ~= 0.9
flask-login ~= 0.5
pytz
urllib3
# Set these versions together to avoid a RequestsDependencyWarning
requests[socks] ~= 2.26
urllib3 > 1.26
chardet > 2.3.0
wtforms ~= 2.3.3
jsonpath-ng ~= 1.5.3