Compare commits

...

5 Commits

Author SHA1 Message Date
dgtlmoon
f8ce1de357 adding more rules 2024-01-11 17:49:26 +01:00
dgtlmoon
0523b3daee Adding comment 2024-01-11 16:00:49 +01:00
dgtlmoon
7108c4e6db fix dupe var set 2024-01-11 15:59:55 +01:00
dgtlmoon
3c073469f4 Merge branch 'master' into 2039-restock-check-all-bug 2024-01-11 15:59:07 +01:00
dgtlmoon
2ca1b0582f Restock - Check all elements for text to get stock status from, strip any text returned
Re #2039
2023-12-08 16:41:53 +01:00
2 changed files with 123 additions and 109 deletions

View File

@@ -61,4 +61,4 @@ class perform_site_check(difference_detection_processor):
# Always record the new checksum # Always record the new checksum
update_obj["previous_md5"] = fetched_md5 update_obj["previous_md5"] = fetched_md5
return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8') return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8').strip()

View File

@@ -1,117 +1,131 @@
function isItemInStock() { function isItemInStock() {
// @todo Pass these in so the same list can be used in non-JS fetchers // @todo Pass these in so the same list can be used in non-JS fetchers
const outOfStockTexts = [ const outOfStockTexts = [
' أخبرني عندما يتوفر', ' أخبرني عندما يتوفر',
'0 in stock', '0 in stock',
'agotado', 'agotado',
'article épuisé', 'article épuisé',
'artikel zurzeit vergriffen', 'artikel zurzeit vergriffen',
'as soon as stock is available', 'as soon as stock is available',
'ausverkauft', // sold out 'ausverkauft', // sold out
'available for back order', 'available for back order',
'back-order or out of stock', 'back-order or out of stock',
'backordered', 'backordered',
'benachrichtigt mich', // notify me 'benachrichtigt mich', // notify me
'brak na stanie', 'brak na stanie',
'brak w magazynie', 'brak w magazynie',
'coming soon', 'coming soon',
'currently have any tickets for this', 'currently have any tickets for this',
'currently unavailable', 'currently unavailable',
'dostępne wkrótce', 'dostępne wkrótce',
'en rupture de stock', 'en rupture de stock',
'ist derzeit nicht auf lager', 'ist derzeit nicht auf lager',
'item is no longer available', 'item is no longer available',
'let me know when it\'s available', 'let me know when it\'s available',
'message if back in stock', 'message if back in stock',
'nachricht bei', 'nachricht bei',
'nicht auf lager', 'nicht auf lager',
'nicht lieferbar', 'nicht lieferbar',
'nicht zur verfügung', 'nicht zur verfügung',
'niet beschikbaar', 'niet beschikbaar',
'niet leverbaar', 'niet leverbaar',
'no disponible temporalmente', 'no disponible temporalmente',
'no longer in stock', 'no longer in stock',
'no tickets available', 'no tickets available',
'not available', 'not available',
'not currently available', 'not currently available',
'not in stock', 'not in stock',
'notify me when available', 'notify me when available',
'não estamos a aceitar encomendas', 'não estamos a aceitar encomendas',
'out of stock', 'out of stock',
'out-of-stock', 'out-of-stock',
'produkt niedostępny', 'produkt niedostępny',
'sold out', 'sold out',
'sold-out', 'sold-out',
'temporarily out of stock', 'temporarily out of stock',
'temporarily unavailable', 'temporarily unavailable',
'tickets unavailable', 'tickets unavailable',
'tijdelijk uitverkocht', 'tijdelijk uitverkocht',
'unavailable tickets', 'unavailable tickets',
'we do not currently have an estimate of when this product will be back in stock.', 'we do not currently have an estimate of when this product will be back in stock.',
'zur zeit nicht an lager', 'we don\'t know when or if this item will be back in stock.',
'品切れ', 'zur zeit nicht an lager',
'已售完', '品切れ',
'품절' '已售完',
]; '품절'
];
function getElementBaseText(element) {
const negateOutOfStockRegexs = [ // .textContent can include text from children which may give the wrong results
'[0-9] in stock' // scan only immediate TEXT_NODEs, which will be a child of the element
] var text = "";
var negateOutOfStockRegexs_r = []; for (var i = 0; i < element.childNodes.length; ++i)
for (let i = 0; i < negateOutOfStockRegexs.length; i++) { if (element.childNodes[i].nodeType === Node.TEXT_NODE)
negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g')); text += element.childNodes[i].textContent;
} return text.toLowerCase().trim();
const elementsWithZeroChildren = Array.from(document.getElementsByTagName('*')).filter(element => element.children.length === 0);
// REGEXS THAT REALLY MEAN IT'S IN STOCK
for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) {
const element = elementsWithZeroChildren[i];
if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
var elementText="";
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase();
} else {
elementText = element.textContent.toLowerCase();
}
if (elementText.length) {
// try which ones could mean its in stock
for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
if (negateOutOfStockRegexs_r[i].test(elementText)) {
return 'Possibly in stock';
}
}
}
} }
}
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK const negateOutOfStockRegexs = [
for (let i = elementsWithZeroChildren.length - 1; i >= 0; i--) { '[0-9] in stock'
const element = elementsWithZeroChildren[i]; ]
if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) { var negateOutOfStockRegexs_r = [];
var elementText=""; for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
if (element.tagName.toLowerCase() === "input") { negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g'));
elementText = element.value.toLowerCase();
} else {
elementText = element.textContent.toLowerCase();
}
if (elementText.length) {
// and these mean its out of stock
for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) {
return elementText; // item is out of stock
}
}
}
} }
}
return 'Possibly in stock'; // possibly in stock, cant decide otherwise. // The out-of-stock or in-stock-text is generally always above-the-fold
// and often below-the-fold is a list of related products that may or may not contain trigger text
// so it's good to filter to just the 'above the fold' elements
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
const elementsToScan = Array.from(document.getElementsByTagName('*')).filter(element => element.getBoundingClientRect().top + window.scrollY <= window.innerHeight && element.getBoundingClientRect().top + window.scrollY >= 100);
var elementText = "";
// REGEXS THAT REALLY MEAN IT'S IN STOCK
for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i];
elementText = "";
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase();
} else {
elementText = getElementBaseText(element);
}
if (elementText.length) {
// try which ones could mean its in stock
for (let i = 0; i < negateOutOfStockRegexs.length; i++) {
if (negateOutOfStockRegexs_r[i].test(elementText)) {
return 'Possibly in stock';
}
}
}
}
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i];
if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) {
elementText = "";
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase();
} else {
elementText = getElementBaseText(element);
}
if (elementText.length) {
// and these mean its out of stock
for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) {
return outOfStockText; // item is out of stock
}
}
}
}
}
return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
} }
// returns the element text that makes it think it's out of stock // returns the element text that makes it think it's out of stock
return isItemInStock(); return isItemInStock().trim()