mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-06-22 08:31:16 +00:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 59fa8db18f | |||
| f3c7c969d8 | |||
| 1355c2a245 | |||
| 96cf1a06df | |||
| 019a4a0375 | |||
| db2f7b80ea | |||
| bfabd7b094 | |||
| d92dbfe765 | |||
| 09685c62ab |
@@ -7,6 +7,20 @@ assignees: 'dgtlmoon'
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**DO NOT USE THIS FORM TO REPORT THAT A PARTICULAR WEBSITE IS NOT SCRAPING/WATCHING AS EXPECTED**
|
||||||
|
|
||||||
|
This form is only for direct bugs and feature requests todo directly with the software.
|
||||||
|
|
||||||
|
Please report watched websites (full URL and _any_ settings) that do not work with changedetection.io as expected [**IN THE DISCUSSION FORUMS**](https://github.com/dgtlmoon/changedetection.io/discussions) or your report will be deleted
|
||||||
|
|
||||||
|
CONSIDER TAKING OUT A SUBSCRIPTION FOR A SMALL PRICE PER MONTH, YOU GET THE BENEFIT OF USING OUR PAID PROXIES AND FURTHERING THE DEVELOPMENT OF CHANGEDETECTION.IO
|
||||||
|
|
||||||
|
THANK YOU
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
**Describe the bug**
|
**Describe the bug**
|
||||||
A clear and concise description of what the bug is.
|
A clear and concise description of what the bug is.
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
## Web Site Change Detection, Monitoring and Notification.
|
## Web Site Change Detection, Monitoring and Notification.
|
||||||
|
|
||||||
[**Try our $6.99/month subscription - Unlimited checks and watches!**](https://lemonade.changedetection.io/start)
|
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
|
||||||
|
|
||||||
|
|
||||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
|
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
|
||||||
|
|
||||||
@@ -11,7 +10,7 @@
|
|||||||
|
|
||||||
Know when important content changes, we support notifications via Discord, Telegram, Home-Assistant, Slack, Email and 70+ more
|
Know when important content changes, we support notifications via Discord, Telegram, Home-Assistant, Slack, Email and 70+ more
|
||||||
|
|
||||||
[**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
|
[**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -40,7 +39,18 @@ Know when important content changes, we support notifications via Discord, Teleg
|
|||||||
- Monitor HTML source code for unexpected changes, strengthen your PCI compliance
|
- Monitor HTML source code for unexpected changes, strengthen your PCI compliance
|
||||||
- You have a very sensitive list of URLs to watch and you do _not_ want to use the paid alternatives. (Remember, _you_ are the product)
|
- You have a very sensitive list of URLs to watch and you do _not_ want to use the paid alternatives. (Remember, _you_ are the product)
|
||||||
|
|
||||||
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver!</a>_
|
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_
|
||||||
|
|
||||||
|
#### Key Features
|
||||||
|
|
||||||
|
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
||||||
|
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
|
||||||
|
- Switch between fast non-JS and Chrome JS based "fetchers"
|
||||||
|
- Easily specify how often a site should be checked
|
||||||
|
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
|
||||||
|
- Override Request Headers, Specify `POST` or `GET` and other methods
|
||||||
|
- Use the "Visual Selector" to help target specific elements
|
||||||
|
|
||||||
|
|
||||||
## Screenshots
|
## Screenshots
|
||||||
|
|
||||||
|
|||||||
@@ -4,8 +4,6 @@ from typing import List
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from jsonpath_ng.ext import parse
|
from jsonpath_ng.ext import parse
|
||||||
import re
|
import re
|
||||||
from inscriptis import get_text
|
|
||||||
from inscriptis.model.config import ParserConfig
|
|
||||||
|
|
||||||
class FilterNotFoundInResponse(ValueError):
|
class FilterNotFoundInResponse(ValueError):
|
||||||
def __init__(self, msg):
|
def __init__(self, msg):
|
||||||
@@ -190,9 +188,16 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
|||||||
|
|
||||||
|
|
||||||
def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
||||||
|
import multiprocessing
|
||||||
|
|
||||||
|
from inscriptis.model.config import ParserConfig
|
||||||
|
|
||||||
"""Converts html string to a string with just the text. If ignoring
|
"""Converts html string to a string with just the text. If ignoring
|
||||||
rendering anchor tag content is enable, anchor tag content are also
|
rendering anchor tag content is enable, anchor tag content are also
|
||||||
included in the text
|
included in the text
|
||||||
|
|
||||||
|
@NOTE: HORRIBLE LXML INDUCED MEMORY LEAK WORKAROUND HERE
|
||||||
|
https://www.reddit.com/r/Python/comments/j0gl8t/psa_pythonlxml_memory_leaks_and_a_solution/
|
||||||
|
|
||||||
:param html_content: string with html content
|
:param html_content: string with html content
|
||||||
:param render_anchor_tag_content: boolean flag indicating whether to extract
|
:param render_anchor_tag_content: boolean flag indicating whether to extract
|
||||||
@@ -214,8 +219,19 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
|||||||
else:
|
else:
|
||||||
parser_config = None
|
parser_config = None
|
||||||
|
|
||||||
# get text and annotations via inscriptis
|
|
||||||
text_content = get_text(html_content, config=parser_config)
|
def parse_function(html_content, parser_config, results_queue):
|
||||||
|
from inscriptis import get_text
|
||||||
|
# get text and annotations via inscriptis
|
||||||
|
text_content = get_text(html_content, config=parser_config)
|
||||||
|
results_queue.put(text_content)
|
||||||
|
|
||||||
|
results_queue = multiprocessing.Queue()
|
||||||
|
parse_process = multiprocessing.Process(target=parse_function, args=(html_content, parser_config, results_queue))
|
||||||
|
parse_process.daemon = True
|
||||||
|
parse_process.start()
|
||||||
|
text_content = results_queue.get() # blocks until results are available
|
||||||
|
parse_process.terminate()
|
||||||
|
|
||||||
return text_content
|
return text_content
|
||||||
|
|
||||||
|
|||||||
@@ -57,6 +57,7 @@
|
|||||||
</br>
|
</br>
|
||||||
{% if is_html_webdriver %}
|
{% if is_html_webdriver %}
|
||||||
{% if screenshot %}
|
{% if screenshot %}
|
||||||
|
<div class="snapshot-age">{{watch.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
|
||||||
<img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/>
|
<img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/>
|
||||||
{% else %}
|
{% else %}
|
||||||
No screenshot available just yet! Try rechecking the page.
|
No screenshot available just yet! Try rechecking the page.
|
||||||
|
|||||||
Reference in New Issue
Block a user