mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-12 11:05:42 +00:00
Try handle two different cases of Offers detecting
This commit is contained in:
@@ -7,13 +7,14 @@ from typing import List
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
||||||
TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
|
TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
|
||||||
|
|
||||||
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
||||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||||
# all of those may or may not appear on different websites
|
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
|
||||||
LD_JSON_PRODUCT_OFFER_SELECTOR = "json:$..offers"
|
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
|
||||||
|
|
||||||
class JSONNotFound(ValueError):
|
class JSONNotFound(ValueError):
|
||||||
def __init__(self, msg):
|
def __init__(self, msg):
|
||||||
@@ -293,14 +294,17 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
|||||||
|
|
||||||
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
|
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
|
||||||
def has_ldjson_product_info(content):
|
def has_ldjson_product_info(content):
|
||||||
|
pricing_data = ''
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not 'application/ld+json' in content:
|
if not 'application/ld+json' in content:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Always lowercase the content so the json_filter for finding $..offers matches
|
for filter in LD_JSON_PRODUCT_OFFER_SELECTORS:
|
||||||
pricing_data = extract_json_as_string(content=content.lower(),
|
pricing_data += extract_json_as_string(content=content,
|
||||||
json_filter=LD_JSON_PRODUCT_OFFER_SELECTOR,
|
json_filter=filter,
|
||||||
ensure_is_ldjson_info_type="product")
|
ensure_is_ldjson_info_type="product")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Totally fine
|
# Totally fine
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|||||||
|
|
||||||
name = 'Webpage Text/HTML, JSON and PDF changes'
|
name = 'Webpage Text/HTML, JSON and PDF changes'
|
||||||
description = 'Detects all text changes where possible'
|
description = 'Detects all text changes where possible'
|
||||||
|
json_filter_prefixes = ['json:', 'jq:']
|
||||||
|
|
||||||
class FilterNotFoundInResponse(ValueError):
|
class FilterNotFoundInResponse(ValueError):
|
||||||
def __init__(self, msg):
|
def __init__(self, msg):
|
||||||
@@ -196,7 +196,7 @@ class perform_site_check(difference_detection_processor):
|
|||||||
|
|
||||||
# Inject a virtual LD+JSON price tracker rule
|
# Inject a virtual LD+JSON price tracker rule
|
||||||
if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
|
if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
|
||||||
include_filters_rule.append(html_tools.LD_JSON_PRODUCT_OFFER_SELECTOR)
|
include_filters_rule += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
|
||||||
|
|
||||||
has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip())
|
has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip())
|
||||||
has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip())
|
has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip())
|
||||||
@@ -214,7 +214,6 @@ class perform_site_check(difference_detection_processor):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
if has_filter_rule:
|
if has_filter_rule:
|
||||||
json_filter_prefixes = ['json:', 'jq:']
|
|
||||||
for filter in include_filters_rule:
|
for filter in include_filters_rule:
|
||||||
if any(prefix in filter for prefix in json_filter_prefixes):
|
if any(prefix in filter for prefix in json_filter_prefixes):
|
||||||
stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
|
stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ def set_response_with_ldjson():
|
|||||||
"description":"You dont need it",
|
"description":"You dont need it",
|
||||||
"mpn":"111111",
|
"mpn":"111111",
|
||||||
"sku":"22222",
|
"sku":"22222",
|
||||||
"oFFerS":{
|
"Offers":{
|
||||||
"@type":"AggregateOffer",
|
"@type":"AggregateOffer",
|
||||||
"lowPrice":8097000,
|
"lowPrice":8097000,
|
||||||
"highPrice":8099900,
|
"highPrice":8099900,
|
||||||
@@ -177,7 +177,7 @@ def test_bad_ldjson_is_correctly_ignored(client, live_server):
|
|||||||
"@type": ["Product", "SubType"],
|
"@type": ["Product", "SubType"],
|
||||||
"name": "My test product",
|
"name": "My test product",
|
||||||
"description": "",
|
"description": "",
|
||||||
"OffeRS": {
|
"offers": {
|
||||||
"note" : "You can see the case-insensitive OffERS key, it should work",
|
"note" : "You can see the case-insensitive OffERS key, it should work",
|
||||||
"@type": "Offer",
|
"@type": "Offer",
|
||||||
"offeredBy": {
|
"offeredBy": {
|
||||||
|
|||||||
Reference in New Issue
Block a user