mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 06:37:41 +00:00 
			
		
		
		
	Compare commits
	
		
			3 Commits
		
	
	
		
			sort-text-
			...
			update-ins
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | dc33d49840 | ||
|   | c30f96c4cd | ||
|   | c8310b7e93 | 
| @@ -409,23 +409,6 @@ def has_ldjson_product_info(content): | ||||
|     x=bool(pricing_data) | ||||
|     return x | ||||
|  | ||||
|  | ||||
| def workarounds_for_obfuscations(content): | ||||
|     """ | ||||
|     Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis | ||||
|     This could go into its own Pip package in the future, for faster updates | ||||
|     """ | ||||
|  | ||||
|     # HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span> | ||||
|     # https://github.com/weblyzard/inscriptis/issues/45 | ||||
|     if not content: | ||||
|         return content | ||||
|  | ||||
|     content = re.sub('<!--\s+-->', '', content) | ||||
|  | ||||
|     return content | ||||
|  | ||||
|  | ||||
| def get_triggered_text(content, trigger_text): | ||||
|     triggered_text = [] | ||||
|     result = strip_ignore_text(content=content, | ||||
|   | ||||
| @@ -151,7 +151,6 @@ class perform_site_check(difference_detection_processor): | ||||
|         if is_html or watch.is_source_type_url: | ||||
|  | ||||
|             # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
|             self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content) | ||||
|             html_content = self.fetcher.content | ||||
|  | ||||
|             # If not JSON,  and if it's not text/plain.. | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def set_original_ignore_response(): | ||||
| @@ -21,7 +21,7 @@ def set_original_ignore_response(): | ||||
| def test_obfuscations(client, live_server): | ||||
|     set_original_ignore_response() | ||||
|     live_server_setup(live_server) | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
| @@ -32,12 +32,12 @@ def test_obfuscations(client, live_server): | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Check HTML conversion detected and workd | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # whitespace appears but it renders https://github.com/weblyzard/inscriptis/issues/45#issuecomment-1923339265 | ||||
|     assert b'$90.74' in res.data | ||||
|   | ||||
| @@ -8,7 +8,7 @@ flask_expects_json~=1.7 | ||||
| flask_restful | ||||
| flask_wtf~=1.2 | ||||
| flask~=2.3 | ||||
| inscriptis~=2.2 | ||||
| inscriptis~=2.4 | ||||
| pytz | ||||
| timeago~=1.0 | ||||
| validators~=0.21 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user