Merge pull request #180 from zensical/chore/linting-and-type-checking

zensical:chore - Setup Ruff and Mypy, lint code, check in CI
This commit is contained in:
Martin Donath
2025-12-12 12:08:11 +01:00
committed by GitHub
18 changed files with 577 additions and 374 deletions
+4 -4
View File
@@ -21,8 +21,8 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
from .zensical import *
from zensical.zensical import * # noqa: F403
__doc__ = zensical.__doc__
if hasattr(zensical, "__all__"):
__all__ = zensical.__all__
__doc__ = zensical.__doc__ # type: ignore[name-defined] # noqa: F405
if hasattr(zensical, "__all__"): # type: ignore[name-defined] # noqa: F405
__all__ = zensical.__all__ # type: ignore[name-defined] # noqa: F405
+71 -101
View File
@@ -27,21 +27,22 @@ import hashlib
import importlib
import os
import pickle
from typing import IO, Any
from urllib.parse import urlparse
import yaml
from click import ClickException
from deepmerge import always_merger
from yaml import BaseLoader, Loader, YAMLError
from yaml.constructor import ConstructorError
from zensical.extensions.emoji import to_svg, twemoji
try:
import tomllib
except ModuleNotFoundError:
import tomli as tomllib # type: ignore
import tomli as tomllib # type: ignore[no-redef]
from click import ClickException
from deepmerge import always_merger
from typing import Any, IO
from yaml import BaseLoader, Loader, YAMLError
from yaml.constructor import ConstructorError
from urllib.parse import urlparse
from .extensions.emoji import to_svg, twemoji
# ----------------------------------------------------------------------------
# Globals
@@ -64,9 +65,7 @@ side, and use it directly when needed. It's a hack but will do for now.
class ConfigurationError(ClickException):
"""
Configuration resolution or validation failed.
"""
"""Configuration resolution or validation failed."""
# ----------------------------------------------------------------------------
@@ -75,22 +74,17 @@ class ConfigurationError(ClickException):
def parse_config(path: str) -> dict:
"""
Parse configuration file.
"""
"""Parse configuration file."""
# Decide by extension; no need to convert to Path
_, ext = os.path.splitext(path)
if ext.lower() == ".toml":
return parse_zensical_config(path)
else:
return parse_mkdocs_config(path)
return parse_mkdocs_config(path)
def parse_zensical_config(path: str) -> dict:
"""
Parse zensical.toml configuration file.
"""
global _CONFIG
"""Parse zensical.toml configuration file."""
global _CONFIG # noqa: PLW0603
with open(path, "rb") as f:
config = tomllib.load(f)
if "project" in config:
@@ -102,11 +96,9 @@ def parse_zensical_config(path: str) -> dict:
def parse_mkdocs_config(path: str) -> dict:
"""
Parse mkdocs.yml configuration file.
"""
global _CONFIG
with open(path, "r") as f:
"""Parse mkdocs.yml configuration file."""
global _CONFIG # noqa: PLW0603
with open(path) as f:
config = _yaml_load(f)
# Apply defaults and return parsed configuration
@@ -114,24 +106,20 @@ def parse_mkdocs_config(path: str) -> dict:
return _CONFIG
def get_config():
"""
Return configuration.
"""
return _CONFIG
def get_config() -> dict:
"""Return configuration."""
# We assume this function is only called after populating `_CONFIG`.
return _CONFIG # type: ignore[return-value]
def get_theme_dir() -> str:
"""
Return the theme directory.
"""
"""Return the theme directory."""
path = os.path.dirname(os.path.abspath(__file__))
return os.path.join(path, "templates")
def _apply_defaults(config: dict, path: str) -> dict:
"""
Apply default settings in configuration.
"""Apply default settings in configuration.
Note that this is loosely based on the defaults that MkDocs sets in its own
configuration system, which we won't port for compatibility right now, as
@@ -146,12 +134,12 @@ def _apply_defaults(config: dict, path: str) -> dict:
# Set site directory
set_default(config, "site_dir", "site", str)
if ".." in config.get("site_dir"):
if ".." in config.get("site_dir", ""):
raise ConfigurationError("site_dir must not contain '..'")
# Set docs directory
set_default(config, "docs_dir", "docs", str)
if ".." in config.get("docs_dir"):
if ".." in config.get("docs_dir", ""):
raise ConfigurationError("docs_dir must not contain '..'")
# Set defaults for core settings
@@ -475,9 +463,7 @@ def _apply_defaults(config: dict, path: str) -> dict:
def set_default(
entry: dict, key: str, default: Any, data_type: type | None = None
) -> Any:
"""
Set a key to a default value if it isn't set, and optionally cast it to the specified data type.
"""
"""Set a key to a default value if it isn't set, and optionally cast it to the specified data type."""
if key in entry and entry[key] is None:
del entry[key]
@@ -489,24 +475,22 @@ def set_default(
try:
entry[key] = data_type(entry[key])
except (ValueError, TypeError) as e:
raise ValueError(f"Failed to cast key '{key}' to {data_type}: {e}")
raise ValueError(
f"Failed to cast key '{key}' to {data_type}: {e}"
) from e
# Return the resulting value
return entry[key]
def _hash(data: Any) -> int:
"""
Compute a hash for the given data.
"""
hash = hashlib.sha1(pickle.dumps(data))
"""Compute a hash for the given data."""
hash = hashlib.sha1(pickle.dumps(data)) # noqa: S324
return int(hash.hexdigest(), 16) % (2**64)
def _convert_extra(data: dict | list) -> dict | list:
"""
Recursively convert all None values in a dictionary or list to empty strings.
"""
"""Recursively convert all None values in a dictionary or list to empty strings."""
if isinstance(data, dict):
# Process each key-value pair in the dictionary
return {
@@ -515,7 +499,7 @@ def _convert_extra(data: dict | list) -> dict | list:
else ("" if value is None else value)
for key, value in data.items()
}
elif isinstance(data, list):
if isinstance(data, list):
# Process each item in the list
return [
_convert_extra(item)
@@ -523,14 +507,11 @@ def _convert_extra(data: dict | list) -> dict | list:
else ("" if item is None else item)
for item in data
]
else:
return data
return data
def _resolve(symbol: str):
"""
Resolve a symbol to its corresponding Python object.
"""
def _resolve(symbol: str) -> Any:
"""Resolve a symbol to its corresponding Python object."""
module_path, func_name = symbol.rsplit(".", 1)
module = importlib.import_module(module_path)
return getattr(module, func_name)
@@ -540,16 +521,14 @@ def _resolve(symbol: str):
def _convert_nav(nav: list) -> list:
"""
Convert MkDocs navigation
"""
"""Convert MkDocs navigation."""
return [_convert_nav_item(entry) for entry in nav]
def _convert_nav_item(item: str | dict | list) -> dict | list:
"""
Convert MkDocs shorthand navigation structure into something more manageable
as we need to annotate each item with a title, URL, icon, and children.
"""Convert MkDocs shorthand navigation structure into something more manageable.
We need to annotate each item with a title, URL, icon, and children.
"""
if isinstance(item, str):
return {
@@ -563,7 +542,7 @@ def _convert_nav_item(item: str | dict | list) -> dict | list:
}
# Handle Title: URL
elif isinstance(item, dict):
if isinstance(item, dict):
for title, value in item.items():
if isinstance(value, str):
return {
@@ -575,7 +554,7 @@ def _convert_nav_item(item: str | dict | list) -> dict | list:
"is_index": _is_index(value.strip()),
"active": False,
}
elif isinstance(value, list):
if isinstance(value, list):
return {
"title": str(title),
"url": None,
@@ -585,28 +564,25 @@ def _convert_nav_item(item: str | dict | list) -> dict | list:
"is_index": False,
"active": False,
}
raise TypeError(f"Unknown nav item value type: {type(value)}")
# Handle a list of items
elif isinstance(item, list):
return [_convert_nav_item(child) for child in item]
else:
raise ValueError(f"Unknown nav item type: {type(item)}")
raise TypeError(f"Unknown nav item type: {type(item)}")
def _is_index(path: str) -> bool:
"""
Returns, whether the given path points to a section index.
"""
"""Returns, whether the given path points to a section index."""
return os.path.basename(path) in ("index.md", "README.md")
# -----------------------------------------------------------------------------
def _convert_extra_javascript(value: list[Any]) -> list:
"""
Ensure extra_javascript uses a structured format.
"""
def _convert_extra_javascript(value: list) -> list:
"""Ensure extra_javascript uses a structured format."""
for i, item in enumerate(value):
if isinstance(item, str):
value[i] = {
@@ -621,9 +597,7 @@ def _convert_extra_javascript(value: list[Any]) -> list:
item.setdefault("async", False)
item.setdefault("defer", False)
else:
raise ValueError(
f"Unknown extra_javascript item type: {type(item)}"
)
raise TypeError(f"Unknown extra_javascript item type: {type(item)}")
# Return resulting value
return value
@@ -632,12 +606,10 @@ def _convert_extra_javascript(value: list[Any]) -> list:
# -----------------------------------------------------------------------------
def _convert_markdown_extensions(value: Any):
"""
Convert Markdown extensions configuration to what Python Markdown expects.
"""
def _convert_markdown_extensions(value: Any) -> tuple[list[str], dict]:
"""Convert Markdown extensions configuration to what Python Markdown expects."""
markdown_extensions = ["toc", "tables"]
mdx_configs = {"toc": {}, "tables": {}}
mdx_configs: dict[str, dict[str, Any]] = {"toc": {}, "tables": {}}
# In case of Python Markdown Extensions, we allow to omit the necessary
# quotes around the extension names, so we need to hoist the extensions
@@ -645,24 +617,24 @@ def _convert_markdown_extensions(value: Any):
# actually parse the configuration.
if "pymdownx" in value:
pymdownx = value.pop("pymdownx")
for ext, config in pymdownx.items():
for ext, conf in pymdownx.items():
# Special case for blocks extension, which has another level of
# nesting. This is the only extension that requires this.
if ext == "blocks":
for block, config in config.items():
for block, config in conf.items():
value[f"pymdownx.{ext}.{block}"] = config
else:
value[f"pymdownx.{ext}"] = config
value[f"pymdownx.{ext}"] = conf
# Same as for Python Markdown extensions, see above
if "zensical" in value:
zensical = value.pop("zensical")
for ext, config in zensical.items():
for ext, conf in zensical.items():
if ext == "extensions":
for key, config in config.items():
for key, config in conf.items():
value[f"zensical.{ext}.{key}"] = config
else:
value[f"zensical.{ext}"] = config
value[f"zensical.{ext}"] = conf
# Extensions can be defined as a dict
if isinstance(value, dict):
@@ -688,15 +660,12 @@ def _convert_markdown_extensions(value: Any):
def _convert_plugins(value: Any, config: dict) -> dict:
"""
Convert plugins configuration to something we can work with.
"""
"""Convert plugins configuration to something we can work with."""
plugins = {}
# Plugins can be defined as a dict
if isinstance(value, dict):
for name, data in value.items():
plugins[name] = data
plugins.update(value)
# Plugins can also be defined as a list
else:
@@ -753,8 +722,7 @@ def _convert_plugins(value: Any, config: dict) -> dict:
def _yaml_load(
source: IO, loader: type[BaseLoader] | None = None
) -> dict[str, Any]:
"""
Load configuration file and resolve environment variables and parent files.
"""Load configuration file and resolve environment variables and parent files.
Note that INHERIT is only a bandaid that was introduced to allow for some
degree of modularity, but with serious shortcomings. Zensical will use a
@@ -769,12 +737,12 @@ def _yaml_load(
source.read()
.replace("material.extensions", "zensical.extensions")
.replace("materialx", "zensical.extensions"),
Loader=Loader,
Loader=Loader, # noqa: S506
)
except YAMLError as e:
raise ConfigurationError(
f"Encountered an error parsing the configuration file: {e}"
)
) from e
if config is None:
return {}
@@ -788,7 +756,7 @@ def _yaml_load(
raise ConfigurationError(
f"Inherited config file '{relpath}' doesn't exist at '{abspath}'."
)
with open(abspath, "r") as fd:
with open(abspath) as fd:
parent = _yaml_load(fd, loader)
config = always_merger.merge(parent, config)
@@ -796,9 +764,11 @@ def _yaml_load(
return config
def _construct_env_tag(loader: yaml.Loader, node: yaml.Node):
"""
Assign value of ENV variable referenced at node.
def _construct_env_tag(
loader: yaml.Loader,
node: yaml.ScalarNode | yaml.SequenceNode | yaml.MappingNode,
) -> Any:
"""Assign value of ENV variable referenced at node.
MkDocs supports the use of !ENV to reference environment variables in YAML
configuration files. We won't likely support this in Zensical, but for now
@@ -827,7 +797,7 @@ def _construct_env_tag(loader: yaml.Loader, node: yaml.Node):
else:
raise ConstructorError(
context=f"expected a scalar or sequence node, but found {node.id}",
start_mark=node.start_mark,
context_mark=node.start_mark,
)
# Resolve environment variable
+20 -25
View File
@@ -26,21 +26,22 @@ from __future__ import annotations
import codecs
import functools
import os
from glob import iglob
from markdown import Markdown
from pymdownx import emoji, twemoji_db
from typing import TYPE_CHECKING
from xml.etree.ElementTree import Element
from pymdownx import emoji, twemoji_db
if TYPE_CHECKING:
from markdown import Markdown
# -----------------------------------------------------------------------------
# Functions
# -----------------------------------------------------------------------------
def twemoji(options: object, md: Markdown):
"""
Create twemoji index.
"""
def twemoji(options: dict, md: Markdown) -> dict: # noqa: ARG001
"""Create twemoji index."""
paths = options.get("custom_icons", [])[:]
return _load_twemoji_index(tuple(paths))
@@ -53,14 +54,12 @@ def to_svg(
alt: str,
title: str,
category: str,
options: object,
options: dict,
md: Markdown,
):
"""
Load icon.
"""
) -> Element[str]:
"""Load icon."""
if not uc:
icons = md.inlinePatterns["emoji"].emoji_index["emoji"]
icons = md.inlinePatterns["emoji"].emoji_index["emoji"] # type: ignore[attr-defined]
# Create and return element to host icon
el = Element("span", {"class": options.get("classes", index)})
@@ -78,20 +77,16 @@ def to_svg(
# -----------------------------------------------------------------------------
@functools.lru_cache(maxsize=None)
def _load(file: str):
"""
Load icon from file.
"""
@functools.cache
def _load(file: str) -> str:
"""Load icon from file."""
with codecs.open(file, encoding="utf-8") as f:
return f.read()
@functools.lru_cache(maxsize=None)
def _load_twemoji_index(paths):
"""
Load twemoji index and add icons.
"""
@functools.cache
def _load_twemoji_index(paths: tuple[str, ...]) -> dict:
"""Load twemoji index and add icons."""
index = {
"name": "twemoji",
"emoji": twemoji_db.emoji,
@@ -106,8 +101,8 @@ def _load_twemoji_index(paths):
# Index icons provided by the theme and via custom icons
glob = os.path.join(base, "**", "*.svg")
glob = iglob(os.path.normpath(glob), recursive=True)
for file in glob:
svgs = iglob(os.path.normpath(glob), recursive=True)
for file in svgs:
icon = file[len(base) + 1 : -4].replace(os.path.sep, "-")
# Add icon to index
+18 -22
View File
@@ -23,12 +23,17 @@
from __future__ import annotations
from pathlib import PurePosixPath
from typing import TYPE_CHECKING
from urllib.parse import urlparse
from markdown import Extension, Markdown
from markdown.treeprocessors import Treeprocessor
from markdown.util import AMP_SUBSTITUTE
from pathlib import PurePosixPath
from xml.etree.ElementTree import Element
from urllib.parse import urlparse
if TYPE_CHECKING:
from xml.etree.ElementTree import Element
# -----------------------------------------------------------------------------
# Classes
@@ -36,8 +41,7 @@ from urllib.parse import urlparse
class LinksProcessor(Treeprocessor):
"""
Tree processor to replace links in Markdown with URLs.
"""Tree processor to replace links in Markdown with URLs.
Note that we view this as a bandaid until we can do processing on proper
HTML ASTs in Rust. In the meantime, we just replace them as we find them.
@@ -50,7 +54,7 @@ class LinksProcessor(Treeprocessor):
self.path = path # Current page
self.use_directory_urls = use_directory_urls
def run(self, root: Element):
def run(self, root: Element) -> None:
# Now, we determine whether the current page is an index page, as we
# must apply slightly different handling in case of directory URLs
current_is_index = get_name(self.path) in ("index.md", "README.md")
@@ -64,7 +68,7 @@ class LinksProcessor(Treeprocessor):
# Extract value - Python Markdown does some weird stuff where it
# replaces mailto: links with double encoded entities. MkDocs just
# skips if it detects that, so we do the same.
value = el.get(key)
value = el.get(key, "")
if AMP_SUBSTITUTE in value:
continue
@@ -101,21 +105,15 @@ class LinksProcessor(Treeprocessor):
class LinksExtension(Extension):
"""
A Markdown extension to resolve links to other Markdown files.
"""
"""A Markdown extension to resolve links to other Markdown files."""
def __init__(self, path: str, use_directory_urls: bool):
"""
Initialize the extension.
"""
"""Initialize the extension."""
self.path = path # Current page
self.use_directory_urls = use_directory_urls
def extendMarkdown(self, md: Markdown):
"""
Register Markdown extension.
"""
def extendMarkdown(self, md: Markdown) -> None: # noqa: N802
"""Register Markdown extension."""
md.registerExtension(self)
# Create and register treeprocessor - we use the same priority as the
@@ -132,8 +130,6 @@ class LinksExtension(Extension):
def get_name(path: str) -> str:
"""
Get the name of a file from a given path.
"""
path = PurePosixPath(path)
return path.name
"""Get the name of a file from a given path."""
pure_path = PurePosixPath(path)
return pure_path.name
+26 -38
View File
@@ -24,14 +24,17 @@
from __future__ import annotations
import posixpath
from typing import TYPE_CHECKING, Any
from urllib.parse import urlparse
from markdown import Extension, Markdown
from markdown.treeprocessors import Treeprocessor
from urllib.parse import urlparse
from xml.etree.ElementTree import Element
from .links import LinksProcessor
from .utilities.filter import Filter
from zensical.extensions.links import LinksProcessor
from zensical.extensions.utilities.filter import Filter
if TYPE_CHECKING:
from xml.etree.ElementTree import Element
# -----------------------------------------------------------------------------
# Classes
@@ -39,24 +42,19 @@ from .utilities.filter import Filter
class PreviewProcessor(Treeprocessor):
"""
A Markdown treeprocessor to enable instant previews on links.
"""A Markdown treeprocessor to enable instant previews on links.
Note that this treeprocessor is dependent on the `links` treeprocessor
registered programmatically before rendering a page.
"""
def __init__(self, md: Markdown, config: dict):
"""
Initialize the treeprocessor.
"""
"""Initialize the treeprocessor."""
super().__init__(md)
self.config = config
def run(self, root: Element):
"""
Run the treeprocessor.
"""
def run(self, root: Element) -> None:
"""Run the treeprocessor."""
at = self.md.treeprocessors.get_index_for_name("zrelpath")
# Hack: Python Markdown has no notion of where it is, i.e., which file
@@ -84,9 +82,10 @@ class PreviewProcessor(Treeprocessor):
# Walk through all configurations - @todo refactor so that we don't
# iterate multiple times over the same elements
for configuration in configurations:
if not configuration.get("sources"):
if not configuration.get("targets"):
continue
if not configuration.get("sources") and not configuration.get(
"targets"
):
continue
# Skip if page should not be considered
filter = get_filter(configuration, "sources")
@@ -123,8 +122,7 @@ class PreviewProcessor(Treeprocessor):
class PreviewExtension(Extension):
"""
A Markdown extension to enable instant previews on links.
"""A Markdown extension to enable instant previews on links.
This extensions allows to automatically add the `data-preview` attribute to
internal links matching specific criteria, so Material for MkDocs renders a
@@ -132,10 +130,8 @@ class PreviewExtension(Extension):
add previews to links in a programmatic way.
"""
def __init__(self, *args, **kwargs):
"""
Initialize the extension.
"""
def __init__(self, *args: Any, **kwargs: Any) -> None:
"""Initialize the extension."""
self.config = {
"configurations": [[], "Filter configurations"],
"sources": [{}, "Link sources"],
@@ -143,10 +139,8 @@ class PreviewExtension(Extension):
}
super().__init__(*args, **kwargs)
def extendMarkdown(self, md: Markdown):
"""
Register Markdown extension.
"""
def extendMarkdown(self, md: Markdown) -> None: # noqa: N802
"""Register Markdown extension."""
md.registerExtension(self)
# Create and register treeprocessor - we use the same priority as the
@@ -162,17 +156,13 @@ class PreviewExtension(Extension):
# -----------------------------------------------------------------------------
def get_filter(settings: dict, key: str):
"""
Get file filter from settings.
"""
return Filter(config=settings.get(key, {})) # type: ignore
def get_filter(settings: dict, key: str) -> Filter:
"""Get file filter from settings."""
return Filter(config=settings.get(key, {}))
def resolve(processor_path: str, url_path: str) -> str:
"""
Resolve a relative URL path against the processor path.
"""
"""Resolve a relative URL path against the processor path."""
# Remove the file name from the processor path to get the directory
base_path = posixpath.dirname(processor_path)
@@ -194,8 +184,6 @@ def resolve(processor_path: str, url_path: str) -> str:
return posixpath.join(*base_segments)
def makeExtension(**kwargs):
"""
Register Markdown extension.
"""
def makeExtension(**kwargs: Any) -> PreviewExtension: # noqa: N802
"""Register Markdown extension."""
return PreviewExtension(**kwargs)
+81 -81
View File
@@ -23,8 +23,9 @@
from html import escape
from html.parser import HTMLParser
from typing import Any
from markdown import Extension
from markdown import Extension, Markdown
from markdown.postprocessors import Postprocessor
# -----------------------------------------------------------------------------
@@ -33,17 +34,14 @@ from markdown.postprocessors import Postprocessor
class SearchProcessor(Postprocessor):
"""
Post processor that extracts searchable content from the rendered HTML.
"""
"""Post processor that extracts searchable content from the rendered HTML."""
def __init__(self, md):
def __init__(self, md: Markdown) -> None:
super().__init__(md)
self.data = []
self.data: list[dict[str, Any]] = []
def run(self, html):
def run(self, html: str) -> str:
"""Process the rendered HTML and extract text length."""
# Divide page content into sections
parser = Parser()
parser.feed(html)
@@ -76,17 +74,17 @@ class SearchProcessor(Postprocessor):
class SearchExtension(Extension):
"""Markdown extension for search indexing."""
def __init__(self, **kwargs):
def __init__(self, **kwargs: Any) -> None:
self.config = {"keep": [set(), "Set of HTML tags to keep in output"]}
super().__init__(**kwargs)
def extendMarkdown(self, md):
def extendMarkdown(self, md: Markdown) -> None: # noqa: N802
"""Register the PostProcessor with Markdown."""
processor = SearchProcessor(md)
md.postprocessors.register(processor, "search", 0)
def makeExtension(**kwargs):
def makeExtension(**kwargs: Any) -> SearchExtension: # noqa: N802
"""Factory function for creating the extension."""
return SearchExtension(**kwargs)
@@ -96,13 +94,16 @@ def makeExtension(**kwargs):
# HTML element
class Element:
"""
"""HTML element.
An element with attributes, essentially a small wrapper object for the
parser to access attributes in other callbacks than handle_starttag.
"""
# Initialize HTML element
def __init__(self, tag, attrs=None):
def __init__(
self, tag: str, attrs: dict[str, str | None] | None = None
) -> None:
self.tag = tag
self.attrs = attrs or {}
@@ -111,18 +112,17 @@ class Element:
return self.tag
# Support comparison (compare by tag only)
def __eq__(self, other):
if other is Element:
def __eq__(self, other: object) -> bool:
if isinstance(other, Element):
return self.tag == other.tag
else:
return self.tag == other
return self.tag == other
# Support set operations
def __hash__(self):
return hash(self.tag)
# Check whether the element should be excluded
def is_excluded(self):
def is_excluded(self) -> bool:
return "data-search-exclude" in self.attrs
@@ -131,31 +131,31 @@ class Element:
# HTML section
class Section:
"""
"""HTML section.
A block of text with markup, preceded by a title (with markup), i.e., a
headline with a certain level (h1-h6). Internally used by the parser.
"""
# Initialize HTML section
def __init__(self, el, level, depth=0):
def __init__(self, el: Element, level: int, depth: int = 0) -> None:
self.el = el
self.depth = depth
self.depth: int | float = depth
self.level = level
# Initialize section data
self.text = []
self.title = []
self.id = None
self.text: list[str] = []
self.title: list[str] = []
self.id: str | None = None
# String representation
def __repr__(self):
if self.id:
return "#".join([self.el.tag, self.id])
else:
return self.el.tag
return f"{self.el.tag}#{self.id}"
return self.el.tag
# Check whether the section should be excluded
def is_excluded(self):
def is_excluded(self) -> bool:
return self.el.is_excluded()
@@ -164,7 +164,8 @@ class Section:
# HTML parser
class Parser(HTMLParser):
"""
"""Section divider.
This parser divides the given string of HTML into a list of sections, each
of which are preceded by a h1-h6 level heading. A white- and blacklist of
tags dictates which tags should be preserved as part of the index, and
@@ -172,31 +173,31 @@ class Parser(HTMLParser):
"""
# Initialize HTML parser
def __init__(self, *args, **kwargs):
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
# Tags to skip
self.skip = set(
[
"object", # Objects
"script", # Scripts
"style", # Styles
]
)
self.skip: set[str | Element] = {
"object", # Objects
"script", # Scripts
"style", # Styles
}
# Current context and section
self.context = []
self.section = None
self.context: list[Element] = []
self.section: Section | None = None
# All parsed sections
self.data = []
self.data: list[Section] = []
# Called at the start of every HTML tag
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
def handle_starttag(
self, tag: str, attrs: list[tuple[str, str | None]]
) -> None:
attrs_dict = dict(attrs)
# Ignore self-closing tags
el = Element(tag, attrs)
el = Element(tag, attrs_dict)
if tag not in void:
self.context.append(el)
else:
@@ -205,7 +206,7 @@ class Parser(HTMLParser):
# Handle heading
if tag in ([f"h{x}" for x in range(1, 7)]):
depth = len(self.context)
if "id" in attrs:
if "id" in attrs_dict:
# Ensure top-level section
if tag != "h1" and not self.data:
self.section = Section(Element("hx"), 1, depth)
@@ -214,7 +215,7 @@ class Parser(HTMLParser):
# Set identifier, if not first section
self.section = Section(el, int(tag[1:2]), depth)
if self.data:
self.section.id = attrs["id"]
self.section.id = attrs_dict["id"]
# Append section to list
self.data.append(self.section)
@@ -225,7 +226,7 @@ class Parser(HTMLParser):
self.data.append(self.section)
# Handle special cases to skip
for key, value in attrs.items():
for key, value in attrs_dict.items():
# Skip block if explicitly excluded from search
if key == "data-search-exclude":
self.skip.add(el)
@@ -247,7 +248,7 @@ class Parser(HTMLParser):
data.append(f"<{tag}>")
# Called at the end of every HTML tag
def handle_endtag(self, tag):
def handle_endtag(self, tag: str) -> None:
if not self.context or self.context[-1] != tag:
return
@@ -255,6 +256,7 @@ class Parser(HTMLParser):
# a headline is nested in another element. In that case, we close the
# current section, continuing to append data to the previous section,
# which could also be a nested section see https://bit.ly/3IxxIJZ
assert self.section is not None # noqa: S101
if self.section.depth > len(self.context):
for section in reversed(self.data):
if section.depth <= len(self.context):
@@ -295,7 +297,7 @@ class Parser(HTMLParser):
data.append(f"</{tag}>")
# Called for the text contents of each tag
def handle_data(self, data):
def handle_data(self, data: str) -> None:
if self.skip.intersection(self.context):
return
@@ -324,9 +326,11 @@ class Parser(HTMLParser):
# Collapse adjacent whitespace
elif data.isspace():
if not self.section.text or not self.section.text[-1].isspace():
self.section.text.append(data)
elif "pre" in self.context:
if (
not self.section.text
or not self.section.text[-1].isspace()
or "pre" in self.context
):
self.section.text.append(data)
# Handle everything else
@@ -339,35 +343,31 @@ class Parser(HTMLParser):
# -----------------------------------------------------------------------------
# Tags to keep
keep = set(
[
"p",
"code",
"pre",
"li",
"ol",
"ul",
"sub",
"sup",
]
)
keep = {
"p",
"code",
"pre",
"li",
"ol",
"ul",
"sub",
"sup",
}
# Tags that are self-closing
void = set(
[
"area",
"base",
"br",
"col",
"embed",
"hr",
"img",
"input",
"link",
"meta",
"param",
"source",
"track",
"wbr",
]
)
void = {
"area",
"base",
"br",
"col",
"embed",
"hr",
"img",
"input",
"link",
"meta",
"param",
"source",
"track",
"wbr",
}
@@ -31,13 +31,10 @@ from fnmatch import fnmatch
class Filter:
"""
A filter.
"""
"""A filter."""
def __init__(self, config: dict):
"""
Initialize the filter.
"""Initialize the filter.
Arguments:
config: The filter configuration.
@@ -45,8 +42,7 @@ class Filter:
self.config = config
def __call__(self, value: str) -> bool:
"""
Filter a value.
"""Filter a value.
First, the inclusion patterns are checked. Regardless of whether they
are present, the exclusion patterns are checked afterwards. This allows
@@ -59,7 +55,6 @@ class Filter:
Returns:
Whether the value should be included.
"""
# Check if value matches one of the inclusion patterns
if "include" in self.config:
for pattern in self.config["include"]:
+12 -17
View File
@@ -23,14 +23,15 @@
from __future__ import annotations
import click
import os
import shutil
from pathlib import Path
from typing import Any
import click
from click import ClickException
from zensical import build, serve, version
from zensical import build, serve, version
# ----------------------------------------------------------------------------
# Commands
@@ -39,8 +40,8 @@ from zensical import build, serve, version
@click.version_option(version=version(), message="%(version)s")
@click.group()
def cli():
"""Zensical - A modern static site generator"""
def cli() -> None:
"""Zensical - A modern static site generator."""
@cli.command(name="build")
@@ -65,10 +66,8 @@ def cli():
is_flag=True,
help="Strict mode (currently unsupported).",
)
def execute_build(config_file: str | None, **kwargs):
"""
Build a project.
"""
def execute_build(config_file: str | None, **kwargs: Any) -> None:
"""Build a project."""
if config_file is None:
for file in ["zensical.toml", "mkdocs.yml", "mkdocs.yaml"]:
if os.path.exists(file):
@@ -81,7 +80,7 @@ def execute_build(config_file: str | None, **kwargs):
# Build project in Rust runtime, calling back into Python when necessary,
# e.g., to parse MkDocs configuration format or render Markdown
build(os.path.abspath(config_file), kwargs.get("clean"))
build(os.path.abspath(config_file), kwargs.get("clean", False))
@cli.command(name="serve")
@@ -112,10 +111,8 @@ def execute_build(config_file: str | None, **kwargs):
is_flag=True,
help="Strict mode (currently unsupported).",
)
def execute_serve(config_file: str | None, **kwargs):
"""
Build and serve a project.
"""
def execute_serve(config_file: str | None, **kwargs: Any) -> None:
"""Build and serve a project."""
if config_file is None:
for file in ["zensical.toml", "mkdocs.yml", "mkdocs.yaml"]:
if os.path.exists(file):
@@ -137,10 +134,8 @@ def execute_serve(config_file: str | None, **kwargs):
type=click.Path(file_okay=False, dir_okay=True, writable=True),
required=False,
)
def new_project(directory: str | None, **kwargs):
"""
Create a new template project in the current directory or in the given
directory.
def new_project(directory: str | None, **kwargs: Any) -> None: # noqa: ARG001
"""Create a new template project in the current directory or in the given directory.
Raises:
ClickException: if the directory already contains a zensical.toml or a
+18 -17
View File
@@ -24,15 +24,19 @@
from __future__ import annotations
import re
import yaml
from datetime import date, datetime
from typing import TYPE_CHECKING, Any
import yaml
from markdown import Markdown
from yaml import SafeLoader
from .config import get_config
from .extensions.links import LinksExtension
from .extensions.search import SearchExtension
from zensical.config import get_config
from zensical.extensions.links import LinksExtension
from zensical.extensions.search import SearchExtension
if TYPE_CHECKING:
from zensical.extensions.search import SearchProcessor
# ----------------------------------------------------------------------------
# Constants
@@ -53,8 +57,7 @@ Regex pattern to extract front matter.
def render(content: str, path: str) -> dict:
"""
Render Markdown and return HTML.
"""Render Markdown and return HTML.
This function returns rendered HTML as well as the table of contents and
metadata. Now, this is the part where Zensical needs to call into Python,
@@ -77,8 +80,8 @@ def render(content: str, path: str) -> dict:
links.extendMarkdown(md)
# Register search extension, which extracts text for search indexing
search = SearchExtension()
search.extendMarkdown(md)
search_extension = SearchExtension()
search_extension.extendMarkdown(md)
# First, extract metadata - the Python Markdown parser brings a metadata
# extension, but the implementation is broken, as it does not support full
@@ -91,7 +94,7 @@ def render(content: str, path: str) -> dict:
content = content[match.end() :].lstrip("\n")
else:
meta = {}
except Exception:
except Exception: # noqa: BLE001
pass
# Convert Markdown and set nullish metadata to empty string, since we
@@ -106,24 +109,22 @@ def render(content: str, path: str) -> dict:
meta[key] = value.isoformat()
# Obtain search index data, unless page is excluded
search = md.postprocessors["search"]
search_processor: SearchProcessor = md.postprocessors["search"] # type: ignore[assignment]
if meta.get("search", {}).get("exclude", False):
search.data = []
search_processor.data = []
# Return Markdown with metadata
return {
"meta": meta,
"content": content,
"search": search.data,
"search": search_processor.data,
"title": "",
"toc": [_convert_toc(item) for item in getattr(md, "toc_tokens", [])],
}
def _convert_toc(item: any):
"""
Convert a table of contents item to navigation item format.
"""
def _convert_toc(item: Any) -> dict:
"""Convert a table of contents item to navigation item format."""
toc_item = {
"title": item["data-toc-label"] or item["name"],
"id": item["id"],
+5 -11
View File
@@ -25,20 +25,14 @@
# Functions
# ----------------------------------------------------------------------------
def build(config_file: str, clean: bool):
"""
Builds the project.
"""
def build(config_file: str, clean: bool) -> None:
"""Builds the project."""
def serve(config_file: str, dev_addr: str):
"""
Builds and serves the project.
"""
def serve(config_file: str, options: dict) -> None:
"""Builds and serves the project."""
def version() -> str:
"""
Returns the current version.
"""
"""Returns the current version."""
# ----------------------------------------------------------------------------