|
@@ -1,6 +1,6 @@
|
|
|
---
|
|
---
|
|
|
name: graphify
|
|
name: graphify
|
|
|
-description: any input (code, docs, papers, images) → knowledge graph → clustered communities → HTML + JSON + audit report
|
|
|
|
|
|
|
+description: "any input (code, docs, papers, images) - knowledge graph - clustered communities - HTML + JSON + audit report"
|
|
|
trigger: /graphify
|
|
trigger: /graphify
|
|
|
---
|
|
---
|
|
|
|
|
|
|
@@ -185,7 +185,7 @@ for f in detect.get('files', {}).get('code', []):
|
|
|
code_files.extend(collect_files(Path(f)) if Path(f).is_dir() else [Path(f)])
|
|
code_files.extend(collect_files(Path(f)) if Path(f).is_dir() else [Path(f)])
|
|
|
|
|
|
|
|
if code_files:
|
|
if code_files:
|
|
|
- result = extract(code_files)
|
|
|
|
|
|
|
+ result = extract(code_files, cache_root=Path('.'))
|
|
|
Path('graphify-out/.graphify_ast.json').write_text(json.dumps(result, indent=2))
|
|
Path('graphify-out/.graphify_ast.json').write_text(json.dumps(result, indent=2))
|
|
|
print(f'AST: {len(result[\"nodes\"])} nodes, {len(result[\"edges\"])} edges')
|
|
print(f'AST: {len(result[\"nodes\"])} nodes, {len(result[\"edges\"])} edges')
|
|
|
else:
|
|
else:
|
|
@@ -299,8 +299,10 @@ confidence_score is REQUIRED on every edge - never omit it, never use 0.5 as a d
|
|
|
Weak or speculative: 0.4-0.5. Most edges should be 0.6-0.9, not 0.5.
|
|
Weak or speculative: 0.4-0.5. Most edges should be 0.6-0.9, not 0.5.
|
|
|
- AMBIGUOUS edges: 0.1-0.3
|
|
- AMBIGUOUS edges: 0.1-0.3
|
|
|
|
|
|
|
|
|
|
+Node ID format: lowercase, only `[a-z0-9_]`, no dots or slashes. Format: `{stem}_{entity}` where stem is the filename without extension and entity is the symbol name, both normalized (lowercase, non-alphanumeric chars replaced with `_`). Example: `src/auth/session.py` + `ValidateToken` → `session_validatetoken`. This must match the ID the AST extractor generates so cross-references between code and semantic nodes connect correctly.
|
|
|
|
|
+
|
|
|
Output exactly this JSON (no other text):
|
|
Output exactly this JSON (no other text):
|
|
|
-{"nodes":[{"id":"filestem_entityname","label":"Human Readable Name","file_type":"code|document|paper|image","source_file":"relative/path","source_location":null,"source_url":null,"captured_at":null,"author":null,"contributor":null}],"edges":[{"source":"node_id","target":"node_id","relation":"calls|implements|references|cites|conceptually_related_to|shares_data_with|semantically_similar_to|rationale_for","confidence":"EXTRACTED|INFERRED|AMBIGUOUS","confidence_score":1.0,"source_file":"relative/path","source_location":null,"weight":1.0}],"hyperedges":[{"id":"snake_case_id","label":"Human Readable Label","nodes":["node_id1","node_id2","node_id3"],"relation":"participate_in|implement|form","confidence":"EXTRACTED|INFERRED","confidence_score":0.75,"source_file":"relative/path"}],"input_tokens":0,"output_tokens":0}
|
|
|
|
|
|
|
+{"nodes":[{"id":"session_validatetoken","label":"Human Readable Name","file_type":"code|document|paper|image","source_file":"relative/path","source_location":null,"source_url":null,"captured_at":null,"author":null,"contributor":null}],"edges":[{"source":"node_id","target":"node_id","relation":"calls|implements|references|cites|conceptually_related_to|shares_data_with|semantically_similar_to|rationale_for","confidence":"EXTRACTED|INFERRED|AMBIGUOUS","confidence_score":1.0,"source_file":"relative/path","source_location":null,"weight":1.0}],"hyperedges":[{"id":"snake_case_id","label":"Human Readable Label","nodes":["node_id1","node_id2","node_id3"],"relation":"participate_in|implement|form","confidence":"EXTRACTED|INFERRED","confidence_score":0.75,"source_file":"relative/path"}],"input_tokens":0,"output_tokens":0}
|
|
|
```
|
|
```
|
|
|
|
|
|
|
|
**Step B3 - Collect, cache, and merge**
|
|
**Step B3 - Collect, cache, and merge**
|
|
@@ -548,6 +550,36 @@ else:
|
|
|
"
|
|
"
|
|
|
```
|
|
```
|
|
|
|
|
|
|
|
|
|
+### Step 6b - Wiki (only if --wiki flag)
|
|
|
|
|
+
|
|
|
|
|
+**Only run this step if `--wiki` was explicitly given in the original command.**
|
|
|
|
|
+
|
|
|
|
|
+Run this before Step 9 (cleanup) so `.graphify_labels.json` is still available.
|
|
|
|
|
+
|
|
|
|
|
+```bash
|
|
|
|
|
+$(cat graphify-out/.graphify_python) -c "
|
|
|
|
|
+import json
|
|
|
|
|
+from graphify.build import build_from_json
|
|
|
|
|
+from graphify.wiki import to_wiki
|
|
|
|
|
+from graphify.analyze import god_nodes
|
|
|
|
|
+from pathlib import Path
|
|
|
|
|
+
|
|
|
|
|
+extraction = json.loads(Path('graphify-out/.graphify_extract.json').read_text())
|
|
|
|
|
+analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
|
|
|
|
|
+labels_raw = json.loads(Path('graphify-out/.graphify_labels.json').read_text()) if Path('graphify-out/.graphify_labels.json').exists() else {}
|
|
|
|
|
+
|
|
|
|
|
+G = build_from_json(extraction)
|
|
|
|
|
+communities = {int(k): v for k, v in analysis['communities'].items()}
|
|
|
|
|
+cohesion = {int(k): v for k, v in analysis['cohesion'].items()}
|
|
|
|
|
+labels = {int(k): v for k, v in labels_raw.items()}
|
|
|
|
|
+gods = god_nodes(G)
|
|
|
|
|
+
|
|
|
|
|
+n = to_wiki(G, communities, 'graphify-out/wiki', community_labels=labels or None, cohesion=cohesion, god_nodes_data=gods)
|
|
|
|
|
+print(f'Wiki: {n} articles written to graphify-out/wiki/')
|
|
|
|
|
+print(' graphify-out/wiki/index.md -> agent entry point')
|
|
|
|
|
+"
|
|
|
|
|
+```
|
|
|
|
|
+
|
|
|
### Step 7 - Neo4j export (only if --neo4j or --neo4j-push flag)
|
|
### Step 7 - Neo4j export (only if --neo4j or --neo4j-push flag)
|
|
|
|
|
|
|
|
**If `--neo4j`** - generate a Cypher file for manual import:
|
|
**If `--neo4j`** - generate a Cypher file for manual import:
|
|
@@ -830,6 +862,17 @@ if deleted:
|
|
|
# Merge: new nodes/edges into existing graph
|
|
# Merge: new nodes/edges into existing graph
|
|
|
G_existing.update(G_new)
|
|
G_existing.update(G_new)
|
|
|
print(f'Merged: {G_existing.number_of_nodes()} nodes, {G_existing.number_of_edges()} edges')
|
|
print(f'Merged: {G_existing.number_of_nodes()} nodes, {G_existing.number_of_edges()} edges')
|
|
|
|
|
+
|
|
|
|
|
+# Write merged result back to .graphify_extract.json so Step 4 sees the full graph
|
|
|
|
|
+merged_out = {
|
|
|
|
|
+ 'nodes': [{'id': n, **d} for n, d in G_existing.nodes(data=True)],
|
|
|
|
|
+ 'edges': [{'source': u, 'target': v, **d} for u, v, d in G_existing.edges(data=True)],
|
|
|
|
|
+ 'hyperedges': new_extraction.get('hyperedges', []),
|
|
|
|
|
+ 'input_tokens': new_extraction.get('input_tokens', 0),
|
|
|
|
|
+ 'output_tokens': new_extraction.get('output_tokens', 0),
|
|
|
|
|
+}
|
|
|
|
|
+Path('graphify-out/.graphify_extract.json').write_text(json.dumps(merged_out))
|
|
|
|
|
+print(f'[graphify update] Merged extraction written ({len(merged_out[\"nodes\"])} nodes, {len(merged_out[\"edges\"])} edges)')
|
|
|
"
|
|
"
|
|
|
```
|
|
```
|
|
|
|
|
|