mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-01 15:17:20 +00:00
Compare commits
1 Commits
reverse-26
...
restock-pr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8bad0b06ff |
@@ -143,6 +143,8 @@ class perform_site_check(difference_detection_processor):
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
import hashlib
|
||||
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from functools import partial
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
@@ -184,7 +186,11 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
itemprop_availability = {}
|
||||
try:
|
||||
itemprop_availability = get_itemprop_availability(self.fetcher.content)
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments
|
||||
# anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(get_itemprop_availability, self.fetcher.content))
|
||||
itemprop_availability = future.result()
|
||||
except MoreThanOnePriceFound as e:
|
||||
# Add the real data
|
||||
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||
|
||||
@@ -36,6 +36,8 @@ class PDFToHTMLToolNotFound(ValueError):
|
||||
class perform_site_check(difference_detection_processor):
|
||||
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from functools import partial
|
||||
|
||||
changed_detected = False
|
||||
html_content = ""
|
||||
@@ -172,20 +174,30 @@ class perform_site_check(difference_detection_processor):
|
||||
for filter_rule in include_filters_rule:
|
||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(html_tools.xpath_filter, xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
is_rss=is_rss))
|
||||
html_content += future.result()
|
||||
|
||||
elif filter_rule.startswith('xpath1:'):
|
||||
html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
future = executor.submit(partial(html_tools.xpath1_filter, xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss))
|
||||
html_content += future.result()
|
||||
else:
|
||||
html_content += html_tools.include_filters(include_filters=filter_rule,
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
future = executor.submit(partial(html_tools.include_filters, include_filters=filter_rule,
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url)
|
||||
append_pretty_line_formatting=not watch.is_source_type_url))
|
||||
html_content += future.result()
|
||||
|
||||
if not html_content.strip():
|
||||
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
|
||||
@@ -198,9 +210,13 @@ class perform_site_check(difference_detection_processor):
|
||||
else:
|
||||
# extract text
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=is_rss) # 1874 activate the <title workaround hack
|
||||
with ProcessPoolExecutor() as executor:
|
||||
# Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
future = executor.submit(partial(html_tools.html_to_text, html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=is_rss)) #1874 activate the <title workaround hack
|
||||
stripped_text_from_html = future.result()
|
||||
|
||||
|
||||
if watch.get('trim_text_whitespace'):
|
||||
|
||||
@@ -16,31 +16,25 @@ echo "---------------------------------- SOCKS5 -------------------"
|
||||
docker run --network changedet-network \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
|
||||
--rm \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
--hostname cdio \
|
||||
-e "SOCKSTEST=proxiesjson" \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
|
||||
# SOCKS5 related - by manually entering in UI
|
||||
docker run --network changedet-network \
|
||||
--rm \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
--hostname cdio \
|
||||
-e "SOCKSTEST=manual" \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy.py'
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy.py'
|
||||
|
||||
# SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY
|
||||
docker run --network changedet-network \
|
||||
-e "SOCKSTEST=manual-playwright" \
|
||||
--hostname cdio \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
|
||||
-e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
|
||||
--rm \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
|
||||
echo "socks5 server logs"
|
||||
docker logs socks5proxy
|
||||
|
||||
@@ -1,27 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
def set_response():
|
||||
import time
|
||||
data = f"""<html>
|
||||
<body>
|
||||
<h1>Awesome, you made it</h1>
|
||||
yeah the socks request worked
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def test_socks5(client, live_server, measure_memory_usage):
|
||||
live_server_setup(live_server)
|
||||
set_response()
|
||||
|
||||
# Setup a proxy
|
||||
res = client.post(
|
||||
@@ -39,10 +24,7 @@ def test_socks5(client, live_server, measure_memory_usage):
|
||||
|
||||
assert b"Settings updated." in res.data
|
||||
|
||||
# Because the socks server should connect back to us
|
||||
test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
|
||||
test_url = test_url.replace('localhost.localdomain', 'cdio')
|
||||
test_url = test_url.replace('localhost', 'cdio')
|
||||
test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')
|
||||
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
@@ -78,4 +60,4 @@ def test_socks5(client, live_server, measure_memory_usage):
|
||||
)
|
||||
|
||||
# Should see the proper string
|
||||
assert "Awesome, you made it".encode('utf-8') in res.data
|
||||
assert "+0200:".encode('utf-8') in res.data
|
||||
|
||||
@@ -1,32 +1,16 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
def set_response():
|
||||
import time
|
||||
data = f"""<html>
|
||||
<body>
|
||||
<h1>Awesome, you made it</h1>
|
||||
yeah the socks request worked
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
time.sleep(1)
|
||||
|
||||
# should be proxies.json mounted from run_proxy_tests.sh already
|
||||
# -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json
|
||||
def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage):
|
||||
live_server_setup(live_server)
|
||||
set_response()
|
||||
# Because the socks server should connect back to us
|
||||
test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
|
||||
test_url = test_url.replace('localhost.localdomain', 'cdio')
|
||||
test_url = test_url.replace('localhost', 'cdio')
|
||||
|
||||
test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')
|
||||
|
||||
res = client.get(url_for("settings_page"))
|
||||
assert b'name="requests-proxy" type="radio" value="socks5proxy"' in res.data
|
||||
@@ -65,4 +49,4 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage)
|
||||
)
|
||||
|
||||
# Should see the proper string
|
||||
assert "Awesome, you made it".encode('utf-8') in res.data
|
||||
assert "+0200:".encode('utf-8') in res.data
|
||||
|
||||
Reference in New Issue
Block a user