Re #117 Jsonpath based JSON change detection filter (#125)

* Re #117 - Experimental JSON selector support by using 'json:' prefix and any JSONpath rule
This commit is contained in:
dgtlmoon
2021-07-11 22:07:39 +10:00
committed by GitHub
parent f2643c1b65
commit e073521f4d
5 changed files with 171 additions and 10 deletions

View File

@@ -88,12 +88,27 @@ class perform_site_check():
html = r.text
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
is_html = True
css_filter_rule = self.datastore.data['watching'][uuid]['css_filter']
if css_filter_rule and len(css_filter_rule.strip()):
html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content)
if 'json:' in css_filter_rule:
# POC hack, @todo rename vars, see how it fits in with the javascript version
import json
from jsonpath_ng import jsonpath, parse
stripped_text_from_html = get_text(html)
json_data = json.loads(html)
jsonpath_expression = parse(css_filter_rule.replace('json:',''))
match = jsonpath_expression.find(json_data)
stripped_text_from_html = json.dumps(match[0].value, indent=4)
is_html = False
else:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
html = html_tools.css_filter(css_filter=css_filter_rule, html_content=r.content)
if is_html:
stripped_text_from_html = get_text(html)
# Usually from networkIO/requests level
except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e: