diff --git a/changedetectionio/blueprint/tags/templates/edit-tag.html b/changedetectionio/blueprint/tags/templates/edit-tag.html index 08c42a2c..35b3b1d9 100644 --- a/changedetectionio/blueprint/tags/templates/edit-tag.html +++ b/changedetectionio/blueprint/tags/templates/edit-tag.html @@ -13,6 +13,7 @@ /*const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');*/ /*{% endif %}*/ +{% set has_tag_filters_extra='' %} @@ -46,59 +47,12 @@
-
- {% set field = render_field(form.include_filters, - rows=5, - placeholder="#example -xpath://body/div/span[contains(@class, 'example-class')]", - class="m-d") - %} - {{ field }} - {% if '/text()' in field %} - Note!: //text() function does not work where the <element> contains <![CDATA[]]>
- {% endif %} - One CSS, xPath, JSON Path/JQ selector per line, any rules that matches will be used.
-
Show advanced help and tips
-
    -
  • CSS - Limit text to this CSS rule, only text matching this CSS rule is included.
  • -
  • JSON - Limit text to this JSON rule, using either JSONPath or jq (if installed). -
      -
    • JSONPath: Prefix with json:, use json:$ to force re-formatting if required, test your JSONPath here.
    • - {% if jq_support %} -
    • jq: Prefix with jq: and test your jq here. Using jq allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation here. Prefix jqraw: outputs the results as text instead of a JSON list.
    • - {% else %} -
    • jq support not installed
    • - {% endif %} -
    -
  • -
  • XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with xpath: -
      -
    • Example: //*[contains(@class, 'sametext')] or xpath:count(//*[contains(@class, 'sametext')]), test your XPath here
    • -
    • Example: Get all titles from an RSS feed //title/text()
    • -
    • To use XPath1.0: Prefix with xpath1:
    • -
    -
  • -
- Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! here for more CSS selector help.
-
-
-
- {{ render_field(form.subtractive_selectors, rows=5, placeholder="header -footer -nav -.stockticker -//*[contains(text(), 'Advertisement')]") }} - -
    -
  • Remove HTML element(s) by CSS and XPath selectors before text conversion.
  • -
  • Don't paste HTML here, use only CSS and XPath selectors
  • -
  • Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML.
  • -
-
-
- +

These settings are added to any existing watch configurations.

+ {% include "edit/include_subtract.html" %} +
+

Text filtering

+ {% include "edit/text-options.html" %} +
{# rendered sub Template #} diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py index 5513fd04..760aabae 100644 --- a/changedetectionio/processors/text_json_diff/processor.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -252,6 +252,7 @@ class perform_site_check(difference_detection_processor): # 615 Extract text by regex extract_text = watch.get('extract_text', []) + extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text') if len(extract_text) > 0: regex_matched_output = [] for s_re in extract_text: @@ -296,6 +297,8 @@ class perform_site_check(difference_detection_processor): ### CALCULATE MD5 # If there's text to ignore text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) + text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text') + text_for_checksuming = stripped_text_from_html if text_to_ignore: text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore) @@ -308,8 +311,8 @@ class perform_site_check(difference_detection_processor): ############ Blocking rules, after checksum ################# blocked = False - trigger_text = watch.get('trigger_text', []) + trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text') if len(trigger_text): # Assume blocked blocked = True @@ -324,6 +327,7 @@ class perform_site_check(difference_detection_processor): blocked = False text_should_not_be_present = watch.get('text_should_not_be_present', []) + text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present') if len(text_should_not_be_present): # If anything matched, then we should block a change from happening result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), diff --git a/changedetectionio/templates/edit.html b/changedetectionio/templates/edit.html index 2914d73a..47f4d8af 100644 --- a/changedetectionio/templates/edit.html +++ b/changedetectionio/templates/edit.html @@ -314,61 +314,8 @@ Math: {{ 1 + 1 }}") }} -
- {% set field = render_field(form.include_filters, - rows=5, - placeholder=has_tag_filters_extra+"#example -xpath://body/div/span[contains(@class, 'example-class')]", - class="m-d") - %} - {{ field }} - {% if '/text()' in field %} - Note!: //text() function does not work where the <element> contains <![CDATA[]]>
- {% endif %} - One CSS, xPath 1 & 2, JSON Path/JQ selector per line, any rules that matches will be used.
- Show advanced help and tips
- -
-
-
- {{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header -footer -nav -.stockticker -//*[contains(text(), 'Advertisement')]") }} - - - -
+{% include "edit/include_subtract.html" %}

Text filtering

@@ -396,76 +343,9 @@ nav {{ render_checkbox_field(form.trim_text_whitespace) }} Remove any whitespace before and after each line of text
-
-
- {{ render_field(form.trigger_text, rows=5, placeholder="Some text to wait for in a line -/some.regex\d{2}/ for case-INsensitive regex -") }} - -
    -
  • Text to wait for before triggering a change/notification, all text and regex are tested case-insensitive.
  • -
  • Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch
  • -
  • Each line is processed separately (think of each line as "OR")
  • -
  • Note: Wrap in forward slash / to use regex example: /foo\d/
  • -
-
-
-
-
- {{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line -/some.regex\d{2}/ for case-INsensitive regex -") }} - -
    -
  • Matching text will be ignored in the text snapshot (you can still see it but it wont trigger a change)
  • -
  • Each line processed separately, any line matching will be ignored (removed before creating the checksum)
  • -
  • Regular Expression support, wrap the entire line in forward slash /regex/
  • -
  • Changing this will affect the comparison checksum which may trigger an alert
  • -
-
- -
- -
-
- {{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock -Sold out -Not in stock -Unavailable") }} - -
    -
  • Block change-detection while this text is on the page, all text and regex are tested case-insensitive, good for waiting for when a product is available again
  • -
  • Block text is processed from the result-text that comes out of any CSS/JSON Filters for this watch
  • -
  • All lines here must not exist (think of each line as "OR")
  • -
  • Note: Wrap in forward slash / to use regex example: /foo\d/
  • -
-
-
-
-
-
- {{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/ - or -keyword") }} - -
    -
  • Extracts text in the final output (line by line) after other filters using regular expressions or string match; -
      -
    • Regular expression ‐ example /reports.+?2022/i
    • -
    • Don't forget to consider the white-space at the start of a line /.+?reports.+?2022/i
    • -
    • Use //(?aiLmsux)) type flags (more information here)
    • -
    • Keyword example ‐ example Out of stock
    • -
    • Use groups to extract just that text ‐ example /reports.+?(\d+)/i returns a list of years only
    • -
    • Example - match lines containing a keyword /.*icecream.*/
    • -
    -
  • -
  • One line per regular-expression/string match
  • -
-
-
-
+ {% include "edit/text-options.html" %}
- +