mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 14:47:21 +00:00 
			
		
		
		
	Compare commits
	
		
			4 Commits
		
	
	
		
			3509-pip-a
			...
			custom-res
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | c93a3470a9 | ||
|   | 57c83e868d | ||
|   | ddbbe1ddee | ||
|   | 1a2e9309ed | 
| @@ -199,6 +199,14 @@ nav | ||||
|                         </ul> | ||||
|                      </span> | ||||
|                     </fieldset> | ||||
|                     <fieldset class="pure-group"> | ||||
|                         {{ render_field(form.application.form.custom_outofstock_strings) }} | ||||
|                         <span class="pure-form-message-inline">Additional custom out-of-stock detection strings (one per line).</span> | ||||
|                         </fieldset> | ||||
|                         <fieldset class="pure-group"> | ||||
|                         {{ render_field(form.application.form.custom_instock_strings) }} | ||||
|                         <span class="pure-form-message-inline">Additional custom in-stock detection strings (one per line).</span> | ||||
|                     </fieldset> | ||||
|            </div> | ||||
|  | ||||
|             <div class="tab-pane-inner" id="api"> | ||||
|   | ||||
| @@ -1,8 +1,8 @@ | ||||
| async () => { | ||||
| async (customOutOfStockStrings = []) => { | ||||
|  | ||||
|     function isItemInStock() { | ||||
|         // @todo Pass these in so the same list can be used in non-JS fetchers | ||||
|         const outOfStockTexts = [ | ||||
|         const builtInOutOfStockTexts = [ | ||||
|             ' أخبرني عندما يتوفر', | ||||
|             '0 in stock', | ||||
|             'actuellement indisponible', | ||||
| @@ -110,6 +110,9 @@ async () => { | ||||
|             '품절' | ||||
|         ]; | ||||
|  | ||||
|         // Combine built-in strings with custom strings provided by user | ||||
|         const outOfStockTexts = [...builtInOutOfStockTexts, ...customOutOfStockStrings]; | ||||
|  | ||||
|  | ||||
|         const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0); | ||||
|  | ||||
|   | ||||
| @@ -774,6 +774,20 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|                                                                                                      message="Should contain zero or more attempts")]) | ||||
|     ui = FormField(globalSettingsApplicationUIForm) | ||||
|  | ||||
|     #@todo better validations? | ||||
|  | ||||
|     custom_outofstock_strings = StringListField('Custom out-of-stock detection strings', | ||||
|                                               [validators.Optional()], | ||||
|                                               render_kw={ | ||||
|                                                   "placeholder": "Enter custom out-of-stock strings, one per line\nExample:\nPronto estarán en stock!\nTemporarily out of stock", | ||||
|                                                   "rows": "3"}) | ||||
|  | ||||
|     custom_instock_strings = StringListField('Custom in-stock detection strings', | ||||
|                                            [validators.Optional()], | ||||
|                                            render_kw={ | ||||
|                                                "placeholder": "Enter custom in-stock strings, one per line\nExample:\nDisponible ahora\nIn voorraad", | ||||
|                                                "rows": "3"}) | ||||
|  | ||||
|  | ||||
| class globalSettingsForm(Form): | ||||
|     # Define these as FormFields/"sub forms", this way it matches the JSON storage | ||||
|   | ||||
| @@ -38,6 +38,8 @@ class model(dict): | ||||
|                     # Custom notification content | ||||
|                     'api_access_token_enabled': True, | ||||
|                     'base_url' : None, | ||||
|                     'custom_instock_strings': [], | ||||
|                     'custom_outofstock_strings' : [], | ||||
|                     'empty_pages_are_a_change': False, | ||||
|                     'extract_title_as_title': False, | ||||
|                     'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"), | ||||
|   | ||||
| @@ -1,7 +1,8 @@ | ||||
| from wtforms import ( | ||||
|     BooleanField, | ||||
|     validators, | ||||
|     FloatField | ||||
|     FloatField, | ||||
|     TextAreaField | ||||
| ) | ||||
| from wtforms.fields.choices import RadioField | ||||
| from wtforms.fields.form import FormField | ||||
| @@ -29,6 +30,7 @@ class RestockSettingsForm(Form): | ||||
|  | ||||
|     follow_price_changes = BooleanField('Follow price changes', default=True) | ||||
|  | ||||
|  | ||||
| class processor_settings_form(processor_text_json_diff_form): | ||||
|     restock_settings = FormField(RestockSettingsForm) | ||||
|  | ||||
| @@ -74,7 +76,7 @@ class processor_settings_form(processor_text_json_diff_form): | ||||
|                     {{ render_field(form.restock_settings.price_change_threshold_percent) }} | ||||
|                     <span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br> | ||||
|                     <span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br> | ||||
|                 </fieldset>                 | ||||
|                 </fieldset>            | ||||
|             </div> | ||||
|         </fieldset> | ||||
|         """ | ||||
|   | ||||
| @@ -143,6 +143,89 @@ def is_between(number, lower=None, upper=None): | ||||
| class perform_site_check(difference_detection_processor): | ||||
|     screenshot = None | ||||
|     xpath_data = None | ||||
|      | ||||
|     def _normalize_text_for_matching(self, text): | ||||
|         """ | ||||
|         Normalize text for more robust matching: | ||||
|         - Convert to lowercase | ||||
|         - Remove accents/diacritics   | ||||
|         - Normalize whitespace | ||||
|         """ | ||||
|         import unicodedata | ||||
|         import re | ||||
|          | ||||
|         if not text: | ||||
|             return "" | ||||
|              | ||||
|         # Convert to lowercase | ||||
|         text = text.lower() | ||||
|          | ||||
|         # Remove accents/diacritics (NFD normalization + filter) | ||||
|         # This converts "é" to "e", "ñ" to "n", etc. | ||||
|         text = unicodedata.normalize('NFD', text) | ||||
|         text = ''.join(char for char in text if unicodedata.category(char) != 'Mn') | ||||
|          | ||||
|         # Normalize whitespace (replace multiple spaces/tabs/newlines with single space) | ||||
|         text = re.sub(r'\s+', ' ', text).strip() | ||||
|          | ||||
|         return text | ||||
|  | ||||
|     def _check_custom_strings(self, text_to_check, custom_strings, string_type="out-of-stock"): | ||||
|         """ | ||||
|         Check text against custom strings (either in-stock or out-of-stock). | ||||
|         Uses normalized matching for better international support. | ||||
|         Returns the matched string if found, None otherwise. | ||||
|         """ | ||||
|         if not custom_strings: | ||||
|             return None | ||||
|              | ||||
|         # Split custom strings by newlines and clean them up | ||||
|         raw_custom_list = [s.strip() for s in custom_strings.split('\n') if s.strip()] | ||||
|          | ||||
|         if not raw_custom_list: | ||||
|             return None | ||||
|              | ||||
|         # Normalize both the page text and custom strings for matching | ||||
|         normalized_text = self._normalize_text_for_matching(text_to_check) | ||||
|          | ||||
|         # Check each custom string against the text | ||||
|         for original_custom_text in raw_custom_list: | ||||
|             normalized_custom_text = self._normalize_text_for_matching(original_custom_text) | ||||
|              | ||||
|             if normalized_custom_text and normalized_custom_text in normalized_text: | ||||
|                 logger.debug(f"Custom {string_type} string found: '{original_custom_text}' (normalized: '{normalized_custom_text}')") | ||||
|                 return original_custom_text  # Return the original user-provided string | ||||
|                  | ||||
|         return None | ||||
|      | ||||
|     def _get_combined_instock_strings(self, restock_settings): | ||||
|         """ | ||||
|         Get combined list of built-in and custom in-stock strings. | ||||
|         Custom strings are normalized for better matching. | ||||
|         """ | ||||
|         # Built-in in-stock strings (from the TODO line) | ||||
|         builtin_instock_strings = [ | ||||
|             'instock', | ||||
|             'instoreonly',  | ||||
|             'limitedavailability', | ||||
|             'onlineonly', | ||||
|             'presale' | ||||
|         ] | ||||
|          | ||||
|         # Add custom in-stock strings if provided | ||||
|         custom_strings = restock_settings.get('custom_instock_strings', '').strip() | ||||
|         if custom_strings: | ||||
|             # Normalize custom strings for better matching | ||||
|             custom_list = [] | ||||
|             for s in custom_strings.split('\n'): | ||||
|                 s = s.strip() | ||||
|                 if s: | ||||
|                     normalized = self._normalize_text_for_matching(s) | ||||
|                     if normalized: | ||||
|                         custom_list.append(normalized) | ||||
|             builtin_instock_strings.extend(custom_list) | ||||
|              | ||||
|         return builtin_instock_strings | ||||
|  | ||||
|     def run_changedetection(self, watch): | ||||
|         import hashlib | ||||
| @@ -205,6 +288,7 @@ class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|             if itemprop_availability.get('availability'): | ||||
|                 # @todo: Configurable? | ||||
|  | ||||
|                 if any(substring.lower() in itemprop_availability['availability'].lower() for substring in [ | ||||
|                     'instock', | ||||
|                     'instoreonly', | ||||
| @@ -238,6 +322,8 @@ class perform_site_check(difference_detection_processor): | ||||
|         if self.fetcher.instock_data and itemprop_availability.get('availability') is None: | ||||
|             # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold. | ||||
|             # Careful! this does not really come from chrome/js when the watch is set to plaintext | ||||
|             stock_detection_result = self.fetcher.instock_data | ||||
|  | ||||
|             update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False | ||||
|             logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.") | ||||
|  | ||||
|   | ||||
| @@ -111,3 +111,130 @@ def test_restock_detection(client, live_server, measure_memory_usage): | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'not-in-stock' in res.data, "Correctly showing NOT IN STOCK in the list after it changed from IN STOCK" | ||||
|  | ||||
|  | ||||
| def test_restock_custom_strings(client, live_server): | ||||
|     """Test custom out-of-stock strings feature""" | ||||
|      | ||||
|     # Set up a response with custom out-of-stock text | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|        Some initial text<br> | ||||
|        <p>Which is across multiple lines</p> | ||||
|        <br> | ||||
|        So let's see what happens.  <br> | ||||
|        <div>price: $10.99</div> | ||||
|        <div id="custom">Pronto estarán en stock!</div> | ||||
|        </body> | ||||
|        </html> | ||||
|     """ | ||||
|      | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|      | ||||
|     test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet') | ||||
|  | ||||
|     # Add watch with custom out-of-stock strings | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_views.form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": '', 'processor': 'restock_diff'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|      | ||||
|     # Get the UUID so we can configure the watch | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|      | ||||
|     # Configure custom out-of-stock strings | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1), | ||||
|         data={ | ||||
|             "url": test_url, | ||||
|             'processor': 'restock_diff', | ||||
|             'restock_settings-custom_outofstock_strings': 'Pronto estarán en stock!\nCustom unavailable message' | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|      | ||||
|     # Check that it detects as out of stock | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'not-in-stock' in res.data, "Should detect custom out-of-stock string" | ||||
|      | ||||
|     # Test custom in-stock strings by changing the content | ||||
|     test_return_data_instock = """<html> | ||||
|        <body> | ||||
|        Some initial text<br> | ||||
|        <p>Which is across multiple lines</p> | ||||
|        <br> | ||||
|        So let's see what happens.  <br> | ||||
|        <div>price: $10.99</div> | ||||
|        <div id="custom">Disponible ahora</div> | ||||
|        </body> | ||||
|        </html> | ||||
|     """ | ||||
|      | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data_instock) | ||||
|      | ||||
|     # Update the watch to include custom in-stock strings | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1), | ||||
|         data={ | ||||
|             "url": test_url, | ||||
|             'processor': 'restock_diff', | ||||
|             'restock_settings-custom_outofstock_strings': 'Pronto estarán en stock!\nCustom unavailable message', | ||||
|             'restock_settings-custom_instock_strings': 'Disponible ahora\nIn voorraad' | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|      | ||||
|     # Check again - should be detected as in stock now | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'not-in-stock' not in res.data, "Should detect custom in-stock string and show as available" | ||||
|  | ||||
|  | ||||
| def test_restock_custom_strings_normalization(client, live_server): | ||||
|     """Test key normalization scenarios: accents, case, and spaces""" | ||||
|      | ||||
|     # Test page with Spanish text with accents and mixed case | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|        <div>price: $10.99</div> | ||||
|        <div id="status">¡TEMPORALMENTE    AGOTADO!</div> | ||||
|        </body> | ||||
|        </html> | ||||
|     """ | ||||
|      | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|      | ||||
|     test_url = url_for('test_endpoint', _external=True).replace('http://localhost', 'http://changedet') | ||||
|      | ||||
|     # Add watch | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_views.form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": '', 'processor': 'restock_diff'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|      | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|      | ||||
|     # Configure custom string without accents, lowercase, no extra spaces | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1), | ||||
|         data={ | ||||
|             "url": test_url, | ||||
|             'processor': 'restock_diff', | ||||
|             'restock_settings-custom_outofstock_strings': 'temporalmente agotado' | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|      | ||||
|     # Should detect as out of stock despite text differences | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'not-in-stock' in res.data, "Should match despite accents, case, and spacing differences" | ||||
|  | ||||
|   | ||||
| @@ -0,0 +1,95 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import unittest | ||||
| from changedetectionio.processors.restock_diff.processor import perform_site_check | ||||
|  | ||||
|  | ||||
| class TestCustomStringNormalization(unittest.TestCase): | ||||
|     """Test the text normalization logic for custom out-of-stock strings""" | ||||
|      | ||||
|     def setUp(self): | ||||
|         # Create a processor instance for testing | ||||
|         self.processor = perform_site_check(datastore=None, watch_uuid='test') | ||||
|      | ||||
|     def test_normalize_text_for_matching(self): | ||||
|         """Test the _normalize_text_for_matching method""" | ||||
|          | ||||
|         test_cases = [ | ||||
|             # (input, expected_output) | ||||
|             ("Agotado", "agotado"), | ||||
|             ("AGOTADO", "agotado"),  # Lowercase | ||||
|             ("Sin   stock!", "sin stock!"),  # Normalize whitespace | ||||
|             ("Pronto\t\nestarán\nen stock", "pronto estaran en stock"),  # Multiple whitespace types + accents | ||||
|             ("¡Temporalmente  AGOTADO!", "¡temporalmente agotado!"),  # Complex case | ||||
|             ("", ""),  # Empty string | ||||
|             ("café", "cafe"),  # French accent | ||||
|             ("naïve", "naive"),  # Multiple accents | ||||
|         ] | ||||
|          | ||||
|         for input_text, expected in test_cases: | ||||
|             with self.subTest(input_text=input_text): | ||||
|                 result = self.processor._normalize_text_for_matching(input_text) | ||||
|                 self.assertEqual(result, expected,  | ||||
|                     f"Failed to normalize '{input_text}' -> expected '{expected}', got '{result}'") | ||||
|      | ||||
|     def test_check_custom_strings_normalization(self): | ||||
|         """Test that custom string matching works with normalization""" | ||||
|          | ||||
|         test_cases = [ | ||||
|             # (page_text, custom_strings, should_match, description) | ||||
|             ("AGOTADO", "agotado", True, "uppercase to lowercase"), | ||||
|             ("Agotado", "agotado", True, "single uppercase to lowercase"), | ||||
|             ("Sin   stock!", "sin stock", True, "multiple spaces normalized"), | ||||
|             ("¡Pronto    estarán   en stock!", "pronto estaran en stock", True, "accents + spaces"), | ||||
|             ("TEMPORALMENTE AGOTADO", "temporalmente agotado", True, "multi-word uppercase"), | ||||
|             ("Available now", "agotado", False, "no match case"), | ||||
|             ("", "agotado", False, "empty text"), | ||||
|             ("agotado", "", False, "empty custom strings"), | ||||
|         ] | ||||
|          | ||||
|         for page_text, custom_strings, should_match, description in test_cases: | ||||
|             with self.subTest(description=description): | ||||
|                 result = self.processor._check_custom_strings(page_text, custom_strings, "out-of-stock") | ||||
|                  | ||||
|                 if should_match: | ||||
|                     self.assertIsNotNone(result,  | ||||
|                         f"Expected match for '{description}': '{page_text}' should match '{custom_strings}'") | ||||
|                 else: | ||||
|                     self.assertIsNone(result,  | ||||
|                         f"Expected no match for '{description}': '{page_text}' should not match '{custom_strings}'") | ||||
|      | ||||
|     def test_check_custom_strings_multiline(self): | ||||
|         """Test that multi-line custom strings work properly""" | ||||
|          | ||||
|         page_text = "Product status: TEMPORALMENTE AGOTADO" | ||||
|         custom_strings = """ | ||||
|         sin stock | ||||
|         agotado | ||||
|         temporalmente agotado | ||||
|         """ | ||||
|          | ||||
|         result = self.processor._check_custom_strings(page_text, custom_strings, "out-of-stock") | ||||
|         self.assertIsNotNone(result) | ||||
|         self.assertEqual(result.strip(), "temporalmente agotado") | ||||
|      | ||||
|     def test_get_combined_instock_strings_normalization(self): | ||||
|         """Test that custom in-stock strings are normalized properly""" | ||||
|          | ||||
|         restock_settings = { | ||||
|             'custom_instock_strings': 'Disponible AHORA\nEn Stock\nDISPONÍBLE' | ||||
|         } | ||||
|          | ||||
|         result = self.processor._get_combined_instock_strings(restock_settings) | ||||
|          | ||||
|         # Check that built-in strings are included | ||||
|         self.assertIn('instock', result) | ||||
|         self.assertIn('presale', result) | ||||
|          | ||||
|         # Check that custom strings are normalized and included | ||||
|         self.assertIn('disponible ahora', result) | ||||
|         self.assertIn('en stock', result) | ||||
|         self.assertIn('disponible', result)  # accent removed | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
		Reference in New Issue
	
	Block a user