fix(client-handover): kill PDF text superposition (URL dupe + list page-break)
Two distinct render bugs producing overlapping text on multi-page PDFs: 1. Bare-URL duplication. The print stylesheet injects `(href)` after every external link via `a[href^="http"]::after`. When pandoc/marked auto-links a bare URL or renders `[X](X)`, the visible text already equals the href, so the pseudo-element produces "URL (URL)" and the trailing duplicate wraps onto the next line, colliding with the following block (e.g. "https://pagespeed.web.dev/ (https://...)" then "• Ouvrir, taper l'URL..."). Fix: post-process the body HTML in handover-to-pdf.sh; tag every `<a href="X">X</a>` (text == href, ignoring trailing slash + case) with `class="bare-url"`, and exclude `a.bare-url::after` from the URL-injection rule. Named links still get `(URL)` for print legibility. Belt-and-braces: add `white-space: nowrap` and `break-inside: avoid` on the remaining `::after` so future long URLs cannot wrap across page boundaries either. 2. List item splitting across page boundary. `li` had only `orphans/widows: 3` and no `break-inside`, so a long item could put its bullet on page N and its text on page N+1, overlapping unrelated content. Heading-to-first-block adjacency was also unprotected, so "heading at bottom of page A / intro paragraph or first bullet at top of page B" could produce visual overlap during reflow. Fix: add `li { page-break-inside: avoid; break-inside: avoid; }` and `h{1..4} + p|ul|ol { break-before: avoid; }` so list items stay intact and intros stay glued to their heading. Verified end-to-end: rendered sample md with bare URL + named link + heading-followed-by-list straddling a page break; pdftotext shows each URL once, no orphaned bullets, no `::after` warning from weasyprint. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
17ef213548
commit
864612ff7b
@ -248,6 +248,25 @@ ul li, ol li { margin: 0 0 1.5mm 0; }
|
|||||||
ul li::marker { color: var(--green-moss); }
|
ul li::marker { color: var(--green-moss); }
|
||||||
ol li::marker { color: var(--green-moss); font-weight: 600; }
|
ol li::marker { color: var(--green-moss); font-weight: 600; }
|
||||||
|
|
||||||
|
/* ============ PAGE-BREAK HARDENING ============ */
|
||||||
|
/* Keep each list item intact across pages — prevents the bullet/marker
|
||||||
|
from staying on the previous page while the text reflows to the next
|
||||||
|
(historical cause of "trailing word + leading bullet" superposition). */
|
||||||
|
li {
|
||||||
|
page-break-inside: avoid;
|
||||||
|
break-inside: avoid;
|
||||||
|
}
|
||||||
|
/* Tie the first block after a heading to the heading itself so a page
|
||||||
|
break never splits "heading + intro" or "heading + first list item"
|
||||||
|
across two pages. */
|
||||||
|
h1 + p, h1 + ul, h1 + ol,
|
||||||
|
h2 + p, h2 + ul, h2 + ol,
|
||||||
|
h3 + p, h3 + ul, h3 + ol,
|
||||||
|
h4 + p, h4 + ul, h4 + ol {
|
||||||
|
page-break-before: avoid;
|
||||||
|
break-before: avoid;
|
||||||
|
}
|
||||||
|
|
||||||
strong { color: var(--green-dark); font-weight: 600; }
|
strong { color: var(--green-dark); font-weight: 600; }
|
||||||
|
|
||||||
em { color: var(--green-forest); font-style: italic; }
|
em { color: var(--green-forest); font-style: italic; }
|
||||||
@ -430,6 +449,17 @@ hr {
|
|||||||
a[href^="#"]::after,
|
a[href^="#"]::after,
|
||||||
a[href^="mailto:"]::after,
|
a[href^="mailto:"]::after,
|
||||||
a[href^="tel:"]::after,
|
a[href^="tel:"]::after,
|
||||||
|
a.bare-url::after,
|
||||||
.cover a::after,
|
.cover a::after,
|
||||||
table a::after { content: ""; }
|
table a::after { content: ""; }
|
||||||
|
/* Belt-and-braces: prevent the ::after URL pseudo-element from breaking
|
||||||
|
across pages or columns and overlapping the next block (root cause of
|
||||||
|
historical "text superposition" bugs on long URLs). */
|
||||||
|
a[href^="http"]::after {
|
||||||
|
white-space: nowrap;
|
||||||
|
page-break-before: avoid;
|
||||||
|
page-break-inside: avoid;
|
||||||
|
break-before: avoid;
|
||||||
|
break-inside: avoid;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -146,6 +146,54 @@ print(markdown.markdown(
|
|||||||
|
|
||||||
BODY_HTML="$(md_to_html_body "$SRC_MD")"
|
BODY_HTML="$(md_to_html_body "$SRC_MD")"
|
||||||
|
|
||||||
|
# Tag anchors whose visible text equals their href (auto-linked bare URLs)
|
||||||
|
# so the print stylesheet skips the "(href)" pseudo-element duplication.
|
||||||
|
# Without this, "[https://x.com/](https://x.com/)" or a bare URL renders as
|
||||||
|
# "https://x.com/ (https://x.com/)" and the trailing duplicate wraps onto
|
||||||
|
# the next line, overlapping the following block.
|
||||||
|
tag_bare_url_links() {
|
||||||
|
# Pass HTML via env var so the heredoc can be the python script.
|
||||||
|
HQ_RAW_HTML="$1" python3 <<'PY'
|
||||||
|
import os, sys, re, html as html_lib
|
||||||
|
|
||||||
|
src = os.environ.get("HQ_RAW_HTML", "")
|
||||||
|
|
||||||
|
def normalize(u: str) -> str:
|
||||||
|
return html_lib.unescape(u).strip().rstrip('/').lower()
|
||||||
|
|
||||||
|
# Match <a ...href="X"...>TEXT</a> with no nested tags inside the anchor.
|
||||||
|
ANCHOR_RE = re.compile(
|
||||||
|
r'<a\b([^>]*?)\bhref="([^"]+)"([^>]*)>([^<]*)</a>',
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
def repl(m: re.Match) -> str:
|
||||||
|
pre_attrs, href, post_attrs, text = m.groups()
|
||||||
|
if normalize(href) != normalize(text):
|
||||||
|
return m.group(0)
|
||||||
|
attrs = (pre_attrs or "") + (post_attrs or "")
|
||||||
|
class_re = re.compile(r'\bclass="([^"]*)"', flags=re.IGNORECASE)
|
||||||
|
cm = class_re.search(attrs)
|
||||||
|
if cm:
|
||||||
|
existing = cm.group(1)
|
||||||
|
if "bare-url" in existing.split():
|
||||||
|
new_attrs = attrs
|
||||||
|
else:
|
||||||
|
new_attrs = class_re.sub(
|
||||||
|
f'class="{existing} bare-url"', attrs, count=1
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
new_attrs = attrs.rstrip() + ' class="bare-url"'
|
||||||
|
return f'<a{new_attrs} href="{href}">{text}</a>'
|
||||||
|
|
||||||
|
sys.stdout.write(ANCHOR_RE.sub(repl, src))
|
||||||
|
PY
|
||||||
|
}
|
||||||
|
|
||||||
|
if command -v python3 >/dev/null 2>&1; then
|
||||||
|
BODY_HTML="$(tag_bare_url_links "$BODY_HTML")"
|
||||||
|
fi
|
||||||
|
|
||||||
# ---------------------------- WRAP HTML ----------------------------
|
# ---------------------------- WRAP HTML ----------------------------
|
||||||
|
|
||||||
CSS_CONTENT="$(cat "$BRANDING_DIR/zenquality.css")"
|
CSS_CONTENT="$(cat "$BRANDING_DIR/zenquality.css")"
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user