From 864612ff7b3299bb5914abf3d6f580f7c3bf1230 Mon Sep 17 00:00:00 2001 From: bastien Date: Fri, 8 May 2026 20:23:14 +0200 Subject: [PATCH] fix(client-handover): kill PDF text superposition (URL dupe + list page-break) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two distinct render bugs producing overlapping text on multi-page PDFs: 1. Bare-URL duplication. The print stylesheet injects `(href)` after every external link via `a[href^="http"]::after`. When pandoc/marked auto-links a bare URL or renders `[X](X)`, the visible text already equals the href, so the pseudo-element produces "URL (URL)" and the trailing duplicate wraps onto the next line, colliding with the following block (e.g. "https://pagespeed.web.dev/ (https://...)" then "• Ouvrir, taper l'URL..."). Fix: post-process the body HTML in handover-to-pdf.sh; tag every `X` (text == href, ignoring trailing slash + case) with `class="bare-url"`, and exclude `a.bare-url::after` from the URL-injection rule. Named links still get `(URL)` for print legibility. Belt-and-braces: add `white-space: nowrap` and `break-inside: avoid` on the remaining `::after` so future long URLs cannot wrap across page boundaries either. 2. List item splitting across page boundary. `li` had only `orphans/widows: 3` and no `break-inside`, so a long item could put its bullet on page N and its text on page N+1, overlapping unrelated content. Heading-to-first-block adjacency was also unprotected, so "heading at bottom of page A / intro paragraph or first bullet at top of page B" could produce visual overlap during reflow. Fix: add `li { page-break-inside: avoid; break-inside: avoid; }` and `h{1..4} + p|ul|ol { break-before: avoid; }` so list items stay intact and intros stay glued to their heading. Verified end-to-end: rendered sample md with bare URL + named link + heading-followed-by-list straddling a page break; pdftotext shows each URL once, no orphaned bullets, no `::after` warning from weasyprint. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../resources/branding/zenquality.css | 30 ++++++++++++ .../scripts/handover-to-pdf.sh | 48 +++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/skills/client-handover/resources/branding/zenquality.css b/skills/client-handover/resources/branding/zenquality.css index 2f9e7c7..2d9f978 100644 --- a/skills/client-handover/resources/branding/zenquality.css +++ b/skills/client-handover/resources/branding/zenquality.css @@ -248,6 +248,25 @@ ul li, ol li { margin: 0 0 1.5mm 0; } ul li::marker { color: var(--green-moss); } ol li::marker { color: var(--green-moss); font-weight: 600; } +/* ============ PAGE-BREAK HARDENING ============ */ +/* Keep each list item intact across pages — prevents the bullet/marker + from staying on the previous page while the text reflows to the next + (historical cause of "trailing word + leading bullet" superposition). */ +li { + page-break-inside: avoid; + break-inside: avoid; +} +/* Tie the first block after a heading to the heading itself so a page + break never splits "heading + intro" or "heading + first list item" + across two pages. */ +h1 + p, h1 + ul, h1 + ol, +h2 + p, h2 + ul, h2 + ol, +h3 + p, h3 + ul, h3 + ol, +h4 + p, h4 + ul, h4 + ol { + page-break-before: avoid; + break-before: avoid; +} + strong { color: var(--green-dark); font-weight: 600; } em { color: var(--green-forest); font-style: italic; } @@ -430,6 +449,17 @@ hr { a[href^="#"]::after, a[href^="mailto:"]::after, a[href^="tel:"]::after, + a.bare-url::after, .cover a::after, table a::after { content: ""; } + /* Belt-and-braces: prevent the ::after URL pseudo-element from breaking + across pages or columns and overlapping the next block (root cause of + historical "text superposition" bugs on long URLs). */ + a[href^="http"]::after { + white-space: nowrap; + page-break-before: avoid; + page-break-inside: avoid; + break-before: avoid; + break-inside: avoid; + } } diff --git a/skills/client-handover/scripts/handover-to-pdf.sh b/skills/client-handover/scripts/handover-to-pdf.sh index a32db61..37ec249 100755 --- a/skills/client-handover/scripts/handover-to-pdf.sh +++ b/skills/client-handover/scripts/handover-to-pdf.sh @@ -146,6 +146,54 @@ print(markdown.markdown( BODY_HTML="$(md_to_html_body "$SRC_MD")" +# Tag anchors whose visible text equals their href (auto-linked bare URLs) +# so the print stylesheet skips the "(href)" pseudo-element duplication. +# Without this, "[https://x.com/](https://x.com/)" or a bare URL renders as +# "https://x.com/ (https://x.com/)" and the trailing duplicate wraps onto +# the next line, overlapping the following block. +tag_bare_url_links() { + # Pass HTML via env var so the heredoc can be the python script. + HQ_RAW_HTML="$1" python3 <<'PY' +import os, sys, re, html as html_lib + +src = os.environ.get("HQ_RAW_HTML", "") + +def normalize(u: str) -> str: + return html_lib.unescape(u).strip().rstrip('/').lower() + +# Match TEXT with no nested tags inside the anchor. +ANCHOR_RE = re.compile( + r']*?)\bhref="([^"]+)"([^>]*)>([^<]*)', + flags=re.IGNORECASE, +) + +def repl(m: re.Match) -> str: + pre_attrs, href, post_attrs, text = m.groups() + if normalize(href) != normalize(text): + return m.group(0) + attrs = (pre_attrs or "") + (post_attrs or "") + class_re = re.compile(r'\bclass="([^"]*)"', flags=re.IGNORECASE) + cm = class_re.search(attrs) + if cm: + existing = cm.group(1) + if "bare-url" in existing.split(): + new_attrs = attrs + else: + new_attrs = class_re.sub( + f'class="{existing} bare-url"', attrs, count=1 + ) + else: + new_attrs = attrs.rstrip() + ' class="bare-url"' + return f'{text}' + +sys.stdout.write(ANCHOR_RE.sub(repl, src)) +PY +} + +if command -v python3 >/dev/null 2>&1; then + BODY_HTML="$(tag_bare_url_links "$BODY_HTML")" +fi + # ---------------------------- WRAP HTML ---------------------------- CSS_CONTENT="$(cat "$BRANDING_DIR/zenquality.css")"