diff --git a/.github/workflows/test-only.yml b/.github/workflows/test-only.yml
index 04958b7e..246d254c 100644
--- a/.github/workflows/test-only.yml
+++ b/.github/workflows/test-only.yml
@@ -44,10 +44,60 @@ jobs:
exit 1
fi
+ lint-template-i18n:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v6
+ - name: Check for fragmented gettext calls in templates
+ run: |
+ python3 << 'PYEOF'
+ import re, sys
+ from pathlib import Path
+
+ # Detects adjacent {{ _(...) }} calls on the same line separated only by HTML
+ # tags, whitespace, or non-translating Jinja2 variables — the anti-pattern of
+ # splitting a single sentence across multiple msgids.
+ # See https://github.com/dgtlmoon/changedetection.io/issues/4074 for background.
+ #
+ # The correct fix is to consolidate fragments into one entire-sentence msgid,
+ # injecting dynamic values via %(name)s kwargs — per the GNU gettext manual
+ # sections "Entire sentences" and "No string concatenation". See PR #4076 for
+ # worked examples of each consolidation pattern.
+ #
+ # BASELINE: this limit reflects pre-existing violations present when this check
+ # was introduced. It must only ever go DOWN. Each time you fix a template, lower
+ # the limit by the number of lines fixed so the improvement is locked in.
+ # When the count reaches 0, replace the baseline check with a hard sys.exit(1).
+ BASELINE_LIMIT = 44
+
+ FRAGMENT_RE = re.compile(
+ r'\{\{[^{}]*\b_\s*\([^)]*\)[^{}]*\}\}'
+ r'(?:\s*(?:<[^>]+>|\{\{(?![^}]*\b_\s*\()[^}]*\}\})\s*)+'
+ r'\{\{[^{}]*\b_\s*\([^)]*\)[^{}]*\}\}'
+ )
+
+ violations = []
+ for f in sorted(Path('changedetectionio').rglob('*.html')):
+ for lineno, line in enumerate(f.read_text().splitlines(), 1):
+ if FRAGMENT_RE.search(line):
+ violations.append(f"{f}:{lineno}: {line.strip()[:120]}")
+
+ count = len(violations)
+ print(f"Fragmented i18n calls found: {count} (limit: {BASELINE_LIMIT})")
+ for v in violations:
+ print(v)
+
+ if count > BASELINE_LIMIT:
+ print(f"\nERROR: {count} fragmented gettext calls exceed the baseline of {BASELINE_LIMIT}.")
+ print("Consolidate adjacent _() calls into a single entire-sentence msgid.")
+ print("See https://github.com/dgtlmoon/changedetection.io/issues/4074 for patterns.")
+ sys.exit(1)
+ PYEOF
+
test-application-3-10:
# Only run on push to master (including PR merges)
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
- needs: [lint-code, lint-translations]
+ needs: [lint-code, lint-translations, lint-template-i18n]
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.10'
@@ -55,7 +105,7 @@ jobs:
test-application-3-11:
# Always run
- needs: [lint-code, lint-translations]
+ needs: [lint-code, lint-translations, lint-template-i18n]
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.11'
@@ -63,7 +113,7 @@ jobs:
test-application-3-12:
# Only run on push to master (including PR merges)
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
- needs: [lint-code, lint-translations]
+ needs: [lint-code, lint-translations, lint-template-i18n]
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.12'
@@ -72,7 +122,7 @@ jobs:
test-application-3-13:
# Only run on push to master (including PR merges)
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
- needs: [lint-code, lint-translations]
+ needs: [lint-code, lint-translations, lint-template-i18n]
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.13'
@@ -81,7 +131,7 @@ jobs:
test-application-3-14:
#if: github.event_name == 'push' && github.ref == 'refs/heads/master'
- needs: [lint-code, lint-translations]
+ needs: [lint-code, lint-translations, lint-template-i18n]
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.14'
diff --git a/README.md b/README.md
index 4eedf4e9..db10fe57 100644
--- a/README.md
+++ b/README.md
@@ -352,4 +352,6 @@ changedetectionio.html_tools.elementpath_tostring: Copyright (c), 2018-2021, SIS
Recognition of fantastic contributors to the project
+Developer note: see [translation guide](changedetectionio/translations/README.md) for i18n template patterns and workflow.
+
- Constantin Hong https://github.com/Constantin1489
diff --git a/changedetectionio/translations/README.md b/changedetectionio/translations/README.md
index 3deb8583..35bfa2d4 100644
--- a/changedetectionio/translations/README.md
+++ b/changedetectionio/translations/README.md
@@ -1,103 +1,231 @@
-# Translation Guide
+# Translators Guide
-## Updating Translations
+This document is for contributors who write templates (HTML) and for translators who maintain `.po` files.
+It exists because fragmented `msgid`s — splitting a single sentence across multiple `_()` calls — cause
+systematic translation breakage across many languages. Follow the patterns here to prevent that.
-To maintain consistency and minimize unnecessary changes in translation files, run these commands:
+---
-```bash
-python setup.py extract_messages # Extract translatable strings
-python setup.py update_catalog # Update all language files
-python setup.py compile_catalog # Compile to binary .mo files
+## Terminology
+
+- **Always use "monitor" or "watcher"** for the concept of watching a URL — never the bare word "watch",
+ which translates to "clock" (e.g. `hodinky` in Czech, `시계` in Korean, `時計` in Japanese).
+- Use the **shortest suitable wording** for each language. If a language naturally uses the English
+ derivative, prefer that.
+
+---
+
+## Template rules: do not fragment `msgid`s
+
+### Why fragments break translation
+
+The GNU gettext manual is explicit on this:
+
+> **[Entire sentences](https://www.gnu.org/software/gettext/manual/html_node/Entire-sentences.html)**:
+> Translatable strings should be entire sentences. Because gender/number declension depends on other
+> parts of the sentence, half-sentence *"dumb string concatenation"* breaks in many languages other than English.
+
+> **[No string concatenation](https://www.gnu.org/software/gettext/manual/html_node/No-string-concatenation.html)**:
+> Placing adjacent `_()` calls is semantically equivalent to runtime `strcat` concatenation, so the same
+> guideline applies. The manual also notes that "in some languages the translator might want to swap the
+> order" of components.
+
+> **[No embedded URLs](https://www.gnu.org/software/gettext/manual/html_node/No-embedded-URLs.html)**:
+> URLs should not be written directly inside `msgid`s; they should be injected via `%(name)s` placeholders
+> and values passed as kwargs.
+
+> **[No unusual markup](https://www.gnu.org/software/gettext/manual/html_node/No-unusual-markup.html)**:
+> "HTML markup, however, is common enough that it's probably ok to use in translatable strings."
+
+Fragments break differently depending on language family:
+
+| Language family | How fragmentation breaks it |
+|---|---|
+| SOV (Japanese, Korean, Turkish) | Verb-final word order can't be achieved when verb and subject are in separate fragments |
+| Germanic (German) | Gender/case agreement between article and noun is lost across fragment boundaries |
+| Romance (French, Spanish, Italian, Portuguese) | Adjective placement, subjunctive mood, verb agreement can't be maintained |
+| Slavic (Czech, Ukrainian) | Case (driven by preposition/verb relationships) is easy to get wrong |
+| CJK (Chinese, Japanese, Korean) | Modifier position and SVO-vs-topic-prominent differences can't be applied at fragment level |
+
+A past workaround was redistributing translations across adjacent fragments and using `msgstr " "` (a
+single space) to suppress unused fragments. This is fragile: as soon as the same short `msgid` is reused
+in a different template, the redistributed translation is applied verbatim and breaks the new context.
+
+---
+
+## The four correct patterns
+
+### Pattern 1 — Inline HTML embedding
+
+Keep markup **inside** the `msgid`. Render with `| safe`. This also lets CJK translators decide how to
+handle `` (see CJK section below).
+
+```jinja
+{# BAD: three fragments; CJK translators can't see the at all #}
+{{ _('Helps reduce changes detected caused by sites shuffling lines around, combine with') }}
+{{ _('check unique lines') }}
+{{ _('below.') }}
+
+{# GOOD: one msgid, rendered with |safe #}
+{{ _('Helps reduce changes detected caused by sites shuffling lines around, combine with check unique lines below.') | safe }}
```
-## Configuration
+### Pattern 2 — URL as kwarg
-All translation settings are configured in **`../../setup.cfg`** (single source of truth).
+Pass URLs via `%(name)s` so translators can freely reorder them.
-The configuration below is shown for reference - **edit `setup.cfg` to change settings**:
+```jinja
+{# BAD: URL hardcoded between three fragments #}
+{{ _('Use') }}
+{{ _('AppRise Notification URLs') }}
+{{ _('for notification to just about any service!') }}
+
+{# GOOD: URL passed as kwarg, embedded in the msgid #}
+{{ _('Use AppRise Notification URLs for notification to just about any service!',
+ url='https://github.com/caronc/apprise') | safe }}
+```
+
+### Pattern 3 — Literal `{{}}` escape as kwarg
+
+Jinja2 would double-interpolate `{{token}}` inside a `_()` call. Pass it as a kwarg instead.
+
+```jinja
+{# BAD: literal {{token}} in the middle forces splitting #}
+{{ _('Accepts the') }} {{ '{{token}}' }} {{ _('placeholders listed below') }}
+
+{# GOOD: literal passed as kwarg; msgid stays as an entire sentence #}
+{{ _('Accepts the %(token)s placeholders listed below', token='{{token}}') | safe }}
+```
+
+### Pattern 4 — `{% if %}` outside the `msgid`
+
+Move conditional branches outside `_()` so each branch is a complete sentence, not a fragment.
+
+```jinja
+{# BAD: three fragments; SOV languages can't reorder %(title)s relative to "URL or Title" #}
+{{ _('URL or Title') }}{% if active_tag_uuid %} {{ _('in') }} '{{ active_tag.title }}'{% endif %}
+
+{# GOOD: branch between two complete msgids; each language can freely reorder %(title)s #}
+{% if active_tag_uuid %}
+ {{ _("URL or Title in '%(title)s'", title=active_tag.title) }}
+{% else %}
+ {{ _('URL or Title') }}
+{% endif %}
+```
+
+---
+
+## CJK italic policy
+
+CJK fonts typically have no true italic cut — `` falls back to a mechanical slant that reduces
+legibility. Now that `` is inside `msgid`s, CJK translators can handle it per-locale. Apply this policy
+for `ja` / `zh` / `zh_Hant_TW`:
+
+| Context | Action |
+|---|---|
+| `` used for general emphasis | Replace with ``, or drop if the emphasis is self-evident |
+| `...` | Collapse to `...` |
+| `` wrapping a UI term (e.g. "check unique lines") | Wrap in locale-conventional quotation marks: 「」 for `ja`/`zh_Hant_TW`, `""` for `zh` |
+
+---
+
+## Translation workflow
+
+**Always use these commands** — they read consistent settings from `setup.cfg` and produce minimal diffs:
+
+```bash
+python setup.py extract_messages # Extract translatable strings from source
+python setup.py update_catalog # Propagate new msgids to all .po files
+python setup.py compile_catalog # Compile .po files to binary .mo files
+```
+
+Running `pybabel` directly without the project options causes reordering, rewrapping, and line-number
+churn that makes diffs hard to review.
+
+### Configuration
+
+All translation settings are in `setup.cfg` (single source of truth):
```ini
[extract_messages]
-# Extract translatable strings from source code
mapping_file = babel.cfg
output_file = changedetectionio/translations/messages.pot
input_paths = changedetectionio
keywords = _ _l gettext
-# Options to reduce unnecessary changes in .pot files
sort_by_file = true # Keeps entries ordered by file path
width = 120 # Consistent line width (prevents rewrapping)
add_location = file # Show file path only (not line numbers)
[update_catalog]
-# Update existing .po files with new strings from .pot
-# Note: 'locale' is omitted - Babel auto-discovers all catalogs in output_dir
input_file = changedetectionio/translations/messages.pot
output_dir = changedetectionio/translations
domain = messages
-# Options for consistent formatting
-width = 120 # Consistent line width
+width = 120
no_fuzzy_matching = true # Avoids incorrect automatic matches
[compile_catalog]
-# Compile .po files to .mo binary format
directory = changedetectionio/translations
domain = messages
```
-**Key formatting options:**
-- `sort_by_file = true` - Orders entries by file path (consistent ordering)
-- `width = 120` - Fixed line width prevents text rewrapping
-- `add_location = file` - Shows file path only, not line numbers (reduces git churn)
-- `no_fuzzy_matching = true` - Prevents incorrect automatic fuzzy matches
+---
-## Why Use These Commands?
+## Multi-language fix process
-Running pybabel commands directly without consistent options causes:
-- ❌ Entries get reordered differently each time
-- ❌ Text gets rewrapped at different widths
-- ❌ Line numbers change every edit (if not configured)
-- ❌ Large diffs that make code review difficult
+When you find a translation error in **any** language, you must check all others for the same `msgid`:
-Using `python setup.py` commands ensures:
-- ✅ Consistent ordering (by file path, not alphabetically)
-- ✅ Consistent line width (120 characters, no rewrapping)
-- ✅ File-only locations (no line number churn)
-- ✅ No fuzzy matching (prevents incorrect auto-translations)
-- ✅ Minimal diffs (only actual changes show up)
-- ✅ Easier code review and git history
+```bash
+for lang in cs de en_GB en_US es fr it ja ko pt_BR tr uk zh zh_Hant_TW; do
+ echo "=== $lang ===" && grep -A1 'msgid "YourString"' changedetectionio/translations/$lang/LC_MESSAGES/messages.po
+done
+```
-These commands read settings from `../../setup.cfg` automatically.
+1. Identify every language with the same problem
+2. Fix all affected `.po` files in the same session
+3. Recompile: `python setup.py compile_catalog`
-## Supported Languages
+Never fix one language and move on.
-- `cs` - Czech (Čeština)
-- `de` - German (Deutsch)
-- `en_GB` - English (UK)
-- `en_US` - English (US)
-- `fr` - French (Français)
-- `it` - Italian (Italiano)
-- `ja` - Japanese (日本語)
-- `ko` - Korean (한국어)
-- `pt_BR` - Portuguese (Brasil)
-- `zh` - Chinese Simplified (中文简体)
-- `zh_Hant_TW` - Chinese Traditional (繁體中文)
+---
-## Adding a New Language
+## Supported languages
-1. Initialize the new language catalog:
- ```bash
- pybabel init -i changedetectionio/translations/messages.pot -d changedetectionio/translations -l NEW_LANG_CODE
- ```
-2. Compile it:
- ```bash
- python setup.py compile_catalog
- ```
+| Code | Language |
+|---|---|
+| `cs` | Czech (Čeština) |
+| `de` | German (Deutsch) |
+| `en_GB` | English (UK) |
+| `en_US` | English (US) |
+| `es` | Spanish (Español) |
+| `fr` | French (Français) |
+| `it` | Italian (Italiano) |
+| `ja` | Japanese (日本語) |
+| `ko` | Korean (한국어) |
+| `pt_BR` | Portuguese (Brasil) |
+| `tr` | Turkish (Türkçe) |
+| `uk` | Ukrainian (Українська) |
+| `zh` | Chinese Simplified (中文简体) |
+| `zh_Hant_TW` | Chinese Traditional (繁體中文) |
-Babel will auto-discover the new language on subsequent translation updates.
+## Adding a new language
-## Translation Notes
+```bash
+pybabel init -i changedetectionio/translations/messages.pot \
+ -d changedetectionio/translations \
+ -l NEW_LANG_CODE
+python setup.py compile_catalog
+```
-From CLAUDE.md:
-- Always use "monitor" or "watcher" terminology (not "clock")
-- Use the most brief wording suitable
-- When finding issues in one language, check ALL languages for the same issue
+Babel auto-discovers the new language on subsequent runs.
+
+---
+
+## CI linter
+
+A GitHub Actions job (`lint-template-i18n`) checks for adjacent `{{ _(...) }}` calls on the same line
+separated only by HTML — the primary symptom of fragmented `msgid`s. It enforces a declining baseline:
+the count of existing violations may only go down, never up. When you fix a template, lower the
+`BASELINE_LIMIT` in `.github/workflows/test-only.yml` by the number of lines you fixed.
+
+See [issue #4074](https://github.com/dgtlmoon/changedetection.io/issues/4074) for full background and
+[PR #4076](https://github.com/dgtlmoon/changedetection.io/pull/4076) for worked consolidation examples.