diff --git a/skills/graphify/.graphify_version b/skills/graphify/.graphify_version index f7abe27..d39a225 100644 --- a/skills/graphify/.graphify_version +++ b/skills/graphify/.graphify_version @@ -1 +1 @@ -0.4.2 \ No newline at end of file +0.4.23 \ No newline at end of file diff --git a/skills/graphify/SKILL.md b/skills/graphify/SKILL.md index c9fdb85..9bbaa0a 100644 --- a/skills/graphify/SKILL.md +++ b/skills/graphify/SKILL.md @@ -1,6 +1,6 @@ --- name: graphify -description: any input (code, docs, papers, images) → knowledge graph → clustered communities → HTML + JSON + audit report +description: "any input (code, docs, papers, images) - knowledge graph - clustered communities - HTML + JSON + audit report" trigger: /graphify --- @@ -185,7 +185,7 @@ for f in detect.get('files', {}).get('code', []): code_files.extend(collect_files(Path(f)) if Path(f).is_dir() else [Path(f)]) if code_files: - result = extract(code_files) + result = extract(code_files, cache_root=Path('.')) Path('graphify-out/.graphify_ast.json').write_text(json.dumps(result, indent=2)) print(f'AST: {len(result[\"nodes\"])} nodes, {len(result[\"edges\"])} edges') else: @@ -299,8 +299,10 @@ confidence_score is REQUIRED on every edge - never omit it, never use 0.5 as a d Weak or speculative: 0.4-0.5. Most edges should be 0.6-0.9, not 0.5. - AMBIGUOUS edges: 0.1-0.3 +Node ID format: lowercase, only `[a-z0-9_]`, no dots or slashes. Format: `{stem}_{entity}` where stem is the filename without extension and entity is the symbol name, both normalized (lowercase, non-alphanumeric chars replaced with `_`). Example: `src/auth/session.py` + `ValidateToken` → `session_validatetoken`. This must match the ID the AST extractor generates so cross-references between code and semantic nodes connect correctly. + Output exactly this JSON (no other text): -{"nodes":[{"id":"filestem_entityname","label":"Human Readable Name","file_type":"code|document|paper|image","source_file":"relative/path","source_location":null,"source_url":null,"captured_at":null,"author":null,"contributor":null}],"edges":[{"source":"node_id","target":"node_id","relation":"calls|implements|references|cites|conceptually_related_to|shares_data_with|semantically_similar_to|rationale_for","confidence":"EXTRACTED|INFERRED|AMBIGUOUS","confidence_score":1.0,"source_file":"relative/path","source_location":null,"weight":1.0}],"hyperedges":[{"id":"snake_case_id","label":"Human Readable Label","nodes":["node_id1","node_id2","node_id3"],"relation":"participate_in|implement|form","confidence":"EXTRACTED|INFERRED","confidence_score":0.75,"source_file":"relative/path"}],"input_tokens":0,"output_tokens":0} +{"nodes":[{"id":"session_validatetoken","label":"Human Readable Name","file_type":"code|document|paper|image","source_file":"relative/path","source_location":null,"source_url":null,"captured_at":null,"author":null,"contributor":null}],"edges":[{"source":"node_id","target":"node_id","relation":"calls|implements|references|cites|conceptually_related_to|shares_data_with|semantically_similar_to|rationale_for","confidence":"EXTRACTED|INFERRED|AMBIGUOUS","confidence_score":1.0,"source_file":"relative/path","source_location":null,"weight":1.0}],"hyperedges":[{"id":"snake_case_id","label":"Human Readable Label","nodes":["node_id1","node_id2","node_id3"],"relation":"participate_in|implement|form","confidence":"EXTRACTED|INFERRED","confidence_score":0.75,"source_file":"relative/path"}],"input_tokens":0,"output_tokens":0} ``` **Step B3 - Collect, cache, and merge** @@ -548,6 +550,36 @@ else: " ``` +### Step 6b - Wiki (only if --wiki flag) + +**Only run this step if `--wiki` was explicitly given in the original command.** + +Run this before Step 9 (cleanup) so `.graphify_labels.json` is still available. + +```bash +$(cat graphify-out/.graphify_python) -c " +import json +from graphify.build import build_from_json +from graphify.wiki import to_wiki +from graphify.analyze import god_nodes +from pathlib import Path + +extraction = json.loads(Path('graphify-out/.graphify_extract.json').read_text()) +analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text()) +labels_raw = json.loads(Path('graphify-out/.graphify_labels.json').read_text()) if Path('graphify-out/.graphify_labels.json').exists() else {} + +G = build_from_json(extraction) +communities = {int(k): v for k, v in analysis['communities'].items()} +cohesion = {int(k): v for k, v in analysis['cohesion'].items()} +labels = {int(k): v for k, v in labels_raw.items()} +gods = god_nodes(G) + +n = to_wiki(G, communities, 'graphify-out/wiki', community_labels=labels or None, cohesion=cohesion, god_nodes_data=gods) +print(f'Wiki: {n} articles written to graphify-out/wiki/') +print(' graphify-out/wiki/index.md -> agent entry point') +" +``` + ### Step 7 - Neo4j export (only if --neo4j or --neo4j-push flag) **If `--neo4j`** - generate a Cypher file for manual import: @@ -830,6 +862,17 @@ if deleted: # Merge: new nodes/edges into existing graph G_existing.update(G_new) print(f'Merged: {G_existing.number_of_nodes()} nodes, {G_existing.number_of_edges()} edges') + +# Write merged result back to .graphify_extract.json so Step 4 sees the full graph +merged_out = { + 'nodes': [{'id': n, **d} for n, d in G_existing.nodes(data=True)], + 'edges': [{'source': u, 'target': v, **d} for u, v, d in G_existing.edges(data=True)], + 'hyperedges': new_extraction.get('hyperedges', []), + 'input_tokens': new_extraction.get('input_tokens', 0), + 'output_tokens': new_extraction.get('output_tokens', 0), +} +Path('graphify-out/.graphify_extract.json').write_text(json.dumps(merged_out)) +print(f'[graphify update] Merged extraction written ({len(merged_out[\"nodes\"])} nodes, {len(merged_out[\"edges\"])} edges)') " ```