From ae1f218d3e8bd85b5e15b291300dfb9faa0aa604 Mon Sep 17 00:00:00 2001 From: Bastien Chanot Date: Sat, 27 Jun 2026 00:38:18 +0200 Subject: [PATCH] feat(lib): surgical doc-commit helper + real-exec scope/exclusion tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New lib/doc-commit.sh: surgical commit of ONLY the public-doc files doc-sync patched (passed as args), twin of memory-commit.sh with INVERSE scope. Δ1 dynamic pathspec filtered to changed paths (LRN-051); Δ2 fail-closed + LOUD scope guard rejecting .claude/** and CLAUDE.md (BDR-022) with a dedicated exit 4; Δ3 no hash anchoring (LRN-052); Δ4 `docs:` message. Hash-only on stdout for `doc_hash=$(...)` capture. lib/tests/run-doc-commit.sh: 24 assertions, all REALLY EXECUTED on real git fixtures (no presumed behavior). T1a/b/c prove the guard CATCHES — forbidden path alone → exit 4, mixed legit+forbidden → refuse-all (nothing half-committed, offender named); T2 dynamic pathspec no-match filter; T3/T4 dangling + stale-index safety; T5/T6 idempotent + unsafe-state skip. shellcheck clean (both). Part of the doc-sync coupled chantier (twin of BDR-034). Include + 2 orchestrator reorders follow. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01Ho5EQCFTSvYamuRtVZpp2d --- lib/doc-commit.sh | 147 ++++++++++++++++++++++++++++++++ lib/tests/run-doc-commit.sh | 164 ++++++++++++++++++++++++++++++++++++ 2 files changed, 311 insertions(+) create mode 100755 lib/doc-commit.sh create mode 100755 lib/tests/run-doc-commit.sh diff --git a/lib/doc-commit.sh b/lib/doc-commit.sh new file mode 100755 index 0000000..61772a9 --- /dev/null +++ b/lib/doc-commit.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash +# doc-commit.sh — surgically commit ONLY the PUBLIC-DOC files doc-sync patched. +# +# Twin of memory-commit.sh, INVERSE scope: memory-commit TARGETS .claude/; this +# one commits public docs and must NEVER touch .claude/ or CLAUDE.md (BDR-022). +# Safety lives in the (dynamic) PATHSPEC + a fail-closed scope guard, never in a +# human diff review — automation removes that review, so the scope must be airtight. +# +# The scope guard is fail-CLOSED and LOUD: a forbidden path (.claude/** or +# CLAUDE.md) in the list is an UPSTREAM bug — doc-syncer must never patch those +# (BDR-022). Seeing one, ABORT THE WHOLE COMMIT and signal; do NOT silently filter +# it and commit the rest. A half-commit with no alert would MASK the violation. +# Caller passes EXACTLY the files doc-sync patched this run. +# +# Usage (CLI): +# doc-commit.sh pending ... # exit 0 if any passed file has changes, 1 if clean +# doc-commit.sh commit "" ... # surgical commit +# +# Exit codes (commit): 0 ok/no-op · 2 usage · 3 unsafe git state · 4 scope violation. +# Output contract: diagnostics → stderr; on a real commit the short hash of the doc +# commit is the ONLY thing on stdout (empty on no-op/abort), so callers can capture +# it: doc_hash=$(doc-commit.sh commit "msg" README.md USAGE.md). +# +# Sourceable: docs_pending and commit_docs for the v2 hook. + +set -uo pipefail + +_in_git_repo() { git rev-parse --git-dir >/dev/null 2>&1; } + +# True (0) when the repo is in a state where we must NOT auto-commit: +# detached HEAD, or a merge/rebase/cherry-pick in progress. +_unsafe_state() { + local gitdir + gitdir="$(git rev-parse --git-dir 2>/dev/null)" || return 0 + if [ -e "$gitdir/MERGE_HEAD" ] || [ -e "$gitdir/rebase-merge" ] || + [ -e "$gitdir/rebase-apply" ] || [ -e "$gitdir/CHERRY_PICK_HEAD" ]; then + return 0 + fi + git symbolic-ref -q HEAD >/dev/null 2>&1 || return 0 # detached HEAD + return 1 +} + +# True (0) when a path is OUT OF SCOPE for a doc commit: anything under .claude/ +# (any depth) or a CLAUDE.md (root or nested). These are doc-syncer's read-only +# context, never sync targets (BDR-022) — their presence is an upstream anomaly. +_forbidden_path() { + case "$1" in + .claude | .claude/* | */.claude/* | CLAUDE.md | */CLAUDE.md) return 0 ;; + *) return 1 ;; + esac +} + +# Print every forbidden path in the argument list, one per line (empty = none). +_scope_violations() { + local p + for p in "$@"; do + _forbidden_path "$p" && printf '%s\n' "$p" + done +} + +# Of the passed paths, those that EXIST and have real pending changes. A clean or +# absent path is dropped (not fatal): `git commit -- ` aborts the WHOLE +# commit, while `git add` tolerates it — so scope = only paths git would accept. +_changed_paths() { + local p + for p in "$@"; do + [ -e "$p" ] || continue + [ -n "$(git status --porcelain -- "$p" 2>/dev/null)" ] && printf '%s\n' "$p" + done +} + +# 0 if any passed path has pending changes, 1 if all clean / absent. +docs_pending() { + _in_git_repo || return 1 + local changed + mapfile -t changed < <(_changed_paths "$@") + [ "${#changed[@]}" -gt 0 ] +} + +# Surgical commit of the passed doc paths only. Returns 0 (ok/no-op), 3 (unsafe), +# 4 (scope violation). On a real commit, prints the doc-commit short hash to stdout. +commit_docs() { + local msg="${1:?commit message required}" + shift + _in_git_repo || { + echo "doc-commit: not a git repo — skip" >&2 + return 3 + } + if _unsafe_state; then + echo "doc-commit: detached HEAD or merge/rebase in progress — skip (no commit)" >&2 + return 3 + fi + # FAIL-CLOSED scope guard. A forbidden path is an upstream BDR-022 violation + # (doc-syncer must never patch .claude/ or CLAUDE.md). Abort the WHOLE commit and + # name the offenders — never filter-and-commit-the-rest (that masks the bug). + local violations + mapfile -t violations < <(_scope_violations "$@") + if [ "${#violations[@]}" -gt 0 ]; then + { + echo "doc-commit: REFUSED — out-of-scope path(s) in the doc list (upstream BDR-022 violation):" + printf ' - %s\n' "${violations[@]}" + echo "doc-commit: NOTHING committed. doc-syncer must never patch .claude/ or CLAUDE.md —" \ + "investigate why these surfaced before retrying." + } >&2 + return 4 + fi + local changed + mapfile -t changed < <(_changed_paths "$@") + if [ "${#changed[@]}" -eq 0 ]; then + echo "doc-commit: nothing pending — no-op" >&2 + return 0 + fi + # Re-stage working-tree content over any stale index entry, then commit ONLY + # those paths. The pathspec on `git commit` makes it partial: other staged files + # (dangling code) are not recorded. + git add -- "${changed[@]}" + if git diff --cached --quiet -- "${changed[@]}"; then + echo "doc-commit: only ignored/no-op changes — no-op" >&2 + return 0 + fi + git commit -q -m "$msg" -- "${changed[@]}" + printf 'doc-commit: committed %d file(s): %s\n' "${#changed[@]}" "${changed[*]}" >&2 + git rev-parse --short HEAD +} + +main() { + local cmd="${1:-}" + case "$cmd" in + pending) + shift + docs_pending "$@" + ;; + commit) + shift + commit_docs "$@" + ;; + *) + echo "usage: doc-commit.sh {pending ... | commit ...}" >&2 + return 2 + ;; + esac +} + +# Run main only when executed, not when sourced. +if [ "${BASH_SOURCE[0]}" = "${0}" ]; then + main "$@" +fi diff --git a/lib/tests/run-doc-commit.sh b/lib/tests/run-doc-commit.sh new file mode 100755 index 0000000..3942c68 --- /dev/null +++ b/lib/tests/run-doc-commit.sh @@ -0,0 +1,164 @@ +#!/usr/bin/env bash +# Deterministic tests for lib/doc-commit.sh. +# +# Proves the contract on REAL git behavior (not assumed). Load-bearing deltas vs +# memory-commit, each tested by what it must REFUSE, not only what it accepts: +# T1 inverse-exclusion scope guard (BDR-022) — fail-CLOSED and LOUD: +# T1a forbidden path ALONE (.claude/ and CLAUDE.md) → exit 4, nothing committed +# T1b legit docs only → commits cleanly +# T1c MIXED legit + forbidden → exit 4, NOTHING committed (the trap) +# T2 dynamic pathspec — a clean passed path is filtered, commit does NOT abort +# T3 dangling code (untracked OR pre-staged) never embarked +# T4 stale-staged doc (version A) → commit carries working-tree version B +# T5 idempotent — empty list / clean tree → no-op exit 0 +# T6 unsafe git state (detached HEAD) → exit 3, no commit +# +# No -e: run every test and report, even after a failure. +set -uo pipefail + +HERE="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +HELPER="$HERE/../doc-commit.sh" +ERRFILE="$(mktemp)" +PASS=0 +FAIL=0 + +ok() { printf ' \033[32m✓\033[0m %s\n' "$1"; PASS=$((PASS + 1)); } +ko() { printf ' \033[31m✗\033[0m %s\n' "$1"; FAIL=$((FAIL + 1)); } + +# Fresh throwaway repo: public docs + forbidden context + code, all tracked. +new_repo() { + local d + d="$(mktemp -d)" + git -C "$d" init -q + git -C "$d" config user.email t@t.t + git -C "$d" config user.name tester + mkdir -p "$d/.claude/memory" "$d/src" + printf 'readme baseline\n' >"$d/README.md" + printf 'usage baseline\n' >"$d/USAGE.md" + printf 'deploy baseline\n' >"$d/DEPLOY.md" + printf 'claude-md baseline\n' >"$d/CLAUDE.md" + printf 'decisions baseline\n' >"$d/.claude/memory/decisions.md" + printf 'src baseline\n' >"$d/src/app.txt" + git -C "$d" add -A + git -C "$d" commit -qm baseline + printf '%s' "$d" +} + +# Files recorded in HEAD, sorted (stable compare). +head_files() { git -C "$1" diff-tree --no-commit-id --name-only -r HEAD | sort | tr '\n' ' '; } + +# run → sets RC (exit), OUT (stdout = hash), ERR (stderr = diag). +run() { + local r="$1"; shift + OUT="$( (cd "$r" && "$HELPER" "$@") 2>"$ERRFILE" )"; RC=$? + ERR="$(cat "$ERRFILE")" +} + +echo "T1a — forbidden path ALONE → REFUSE loud (exit 4), nothing committed" +R="$(new_repo)" +BEFORE="$(git -C "$R" rev-parse HEAD)" +printf 'dirtied\n' >>"$R/.claude/memory/decisions.md" +run "$R" commit "docs: T1a-claude" ".claude/memory/decisions.md" +printf ' rc=%s out=[%s]\n' "$RC" "$OUT" +printf ' err: %s\n' "$(printf '%s' "$ERR" | head -1)" +if [ "$RC" -eq 4 ]; then ok ".claude/ alone → exit 4"; else ko "expected 4, got $RC"; fi +if [ "$(git -C "$R" rev-parse HEAD)" = "$BEFORE" ]; then ok "no commit created"; else ko "a commit was created"; fi +if [ -z "$OUT" ]; then ok "stdout empty (no hash)"; else ko "stdout leaked: [$OUT]"; fi +if printf '%s' "$ERR" | grep -qi 'REFUSED'; then ok "stderr is loud (REFUSED)"; else ko "stderr not loud"; fi +printf 'dirtied\n' >>"$R/CLAUDE.md" +run "$R" commit "docs: T1a-claudemd" "CLAUDE.md" +printf ' [CLAUDE.md] rc=%s out=[%s]\n' "$RC" "$OUT" +if [ "$RC" -eq 4 ]; then ok "CLAUDE.md alone → exit 4"; else ko "expected 4, got $RC"; fi +rm -rf "$R" + +echo "T1b — legit docs only → commits cleanly" +R="$(new_repo)" +printf 'feature added\n' >>"$R/README.md" +printf 'cmd changed\n' >>"$R/USAGE.md" +run "$R" commit "docs: T1b update README + USAGE" "README.md" "USAGE.md" +COMMITTED="$(head_files "$R")" +printf ' rc=%s out(hash)=[%s]\n' "$RC" "$OUT" +printf ' committed: [%s]\n' "$COMMITTED" +if [ "$RC" -eq 0 ]; then ok "exit 0"; else ko "expected 0, got $RC"; fi +if [ "$COMMITTED" = "README.md USAGE.md " ]; then ok "committed exactly README + USAGE"; else ko "got [$COMMITTED]"; fi +if [ -n "$OUT" ]; then ok "hash on stdout"; else ko "no hash printed"; fi +if [ -z "$(git -C "$R" status --porcelain -- .claude CLAUDE.md)" ]; then ok ".claude/CLAUDE.md untouched"; else ko "forbidden paths touched"; fi +rm -rf "$R" + +echo "T1c — MIXED legit + forbidden → exit 4, NOTHING committed (the trap)" +R="$(new_repo)" +BEFORE="$(git -C "$R" rev-parse HEAD)" +printf 'feature added\n' >>"$R/README.md" +printf 'dirtied\n' >>"$R/.claude/memory/decisions.md" +run "$R" commit "docs: T1c mixed" "README.md" ".claude/memory/decisions.md" +printf ' rc=%s out=[%s]\n' "$RC" "$OUT" +printf ' err: %s\n' "$(printf '%s' "$ERR" | grep -i decisions | head -1)" +if [ "$RC" -eq 4 ]; then ok "mixed → exit 4"; else ko "expected 4, got $RC"; fi +if [ "$(git -C "$R" rev-parse HEAD)" = "$BEFORE" ]; then ok "NOTHING committed (README not half-committed)"; else ko "a commit slipped through"; fi +if printf '%s' "$ERR" | grep -q '.claude/memory/decisions.md'; then ok "stderr names the offender"; else ko "offender not named"; fi +if git -C "$R" status --porcelain | grep -q ' M README.md'; then ok "README left dirty (not embarked)"; else ko "README state wrong"; fi +rm -rf "$R" + +echo "T2 — dynamic pathspec: clean passed path filtered, no abort" +R="$(new_repo)" +printf 'feature added\n' >>"$R/README.md" +printf 'cmd changed\n' >>"$R/USAGE.md" +# DEPLOY.md passed but NOT modified → must be filtered, must not abort the commit. +run "$R" commit "docs: T2" "README.md" "USAGE.md" "DEPLOY.md" +COMMITTED="$(head_files "$R")" +printf ' rc=%s committed=[%s]\n' "$RC" "$COMMITTED" +if [ "$RC" -eq 0 ]; then ok "exit 0 (clean DEPLOY.md did not abort)"; else ko "expected 0, got $RC"; fi +if [ "$COMMITTED" = "README.md USAGE.md " ]; then ok "committed README + USAGE only (DEPLOY filtered)"; else ko "got [$COMMITTED]"; fi +rm -rf "$R" + +echo "T3 — dangling code (untracked + pre-staged) NOT embarked" +R="$(new_repo)" +printf 'feature added\n' >>"$R/README.md" +printf 'untracked junk\n' >"$R/src/dangling.txt" +printf 'staged junk\n' >"$R/src/staged.txt"; git -C "$R" add src/staged.txt +run "$R" commit "docs: T3" "README.md" +COMMITTED="$(head_files "$R")" +STATUS="$(git -C "$R" status --porcelain)" +printf ' committed=[%s]\n' "$COMMITTED" +if [ "$COMMITTED" = "README.md " ]; then ok "only README committed"; else ko "got [$COMMITTED]"; fi +if printf '%s\n' "$STATUS" | grep -q '^?? src/dangling.txt$'; then ok "untracked code left untracked"; else ko "untracked code embarked"; fi +if printf '%s\n' "$STATUS" | grep -q '^A src/staged.txt$'; then ok "pre-staged code stays staged"; else ko "pre-staged code embarked"; fi +rm -rf "$R" + +echo "T4 — stale-staged doc (A) → commit carries working-tree (B)" +R="$(new_repo)" +printf 'VERSION A\n' >>"$R/README.md"; git -C "$R" add README.md # stage A +printf 'VERSION B\n' >>"$R/README.md" # working tree = A+B +run "$R" commit "docs: T4" "README.md" +HEADCONTENT="$(git -C "$R" show HEAD:README.md)" +printf ' HEAD README tail: %s\n' "$(printf '%s' "$HEADCONTENT" | tail -1)" +if printf '%s\n' "$HEADCONTENT" | grep -q 'VERSION B'; then ok "commit contains working-tree B (re-stage neutralized stale index)"; else ko "stale index A leaked"; fi +rm -rf "$R" + +echo "T5 — idempotent: empty list / clean tree → no-op exit 0" +R="$(new_repo)" +BEFORE="$(git -C "$R" rev-parse HEAD)" +run "$R" commit "docs: T5 empty" # no files at all +printf ' [no files] rc=%s err=%s\n' "$RC" "$(printf '%s' "$ERR" | head -1)" +if [ "$RC" -eq 0 ]; then ok "empty list → exit 0"; else ko "expected 0, got $RC"; fi +run "$R" commit "docs: T5 clean" "README.md" # passed but clean +printf ' [clean README] rc=%s\n' "$RC" +if [ "$RC" -eq 0 ]; then ok "clean path → exit 0"; else ko "expected 0, got $RC"; fi +if [ "$(git -C "$R" rev-parse HEAD)" = "$BEFORE" ]; then ok "no commit created"; else ko "a commit was created"; fi +rm -rf "$R" + +echo "T6 — unsafe state (detached HEAD) → exit 3, no commit" +R="$(new_repo)" +git -C "$R" checkout --detach -q +BEFORE="$(git -C "$R" rev-parse HEAD)" +printf 'feature added\n' >>"$R/README.md" +run "$R" commit "docs: T6" "README.md" +printf ' rc=%s err=%s\n' "$RC" "$(printf '%s' "$ERR" | head -1)" +if [ "$RC" -eq 3 ]; then ok "detached HEAD → exit 3"; else ko "expected 3, got $RC"; fi +if [ "$(git -C "$R" rev-parse HEAD)" = "$BEFORE" ]; then ok "no commit created"; else ko "a commit was created"; fi +rm -rf "$R" + +rm -f "$ERRFILE" +echo "" +printf 'RESULT: %d passed, %d failed\n' "$PASS" "$FAIL" +[ "$FAIL" -eq 0 ]