forked from jgoldin-skillz/confluenceDumpWithPython
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_transform.py
More file actions
118 lines (98 loc) · 4.07 KB
/
test_transform.py
File metadata and controls
118 lines (98 loc) · 4.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Test-Skript für Transform-Phase (Offline HTML-Generierung).
Liest aus raw-data/ und generiert HTML in pages/.
Usage:
python test_transform.py "./output/2026-03-16 1400 Test"
"""
import sys
from pathlib import Path
from confluence_dump.transform.html_processor import HTMLProcessor
from confluence_dump.transform.sidebar_builder import SidebarBuilder
import json
def main():
if len(sys.argv) < 2:
print("Usage: python test_transform.py <workspace_path>")
print('Example: python test_transform.py "./output/2026-03-16 1400 Test"')
sys.exit(1)
workspace = Path(sys.argv[1])
if not workspace.exists():
print(f"Error: Workspace not found: {workspace}")
sys.exit(1)
raw_data_dir = workspace / "raw-data"
pages_dir = workspace / "pages"
attachments_dir = workspace / "attachments"
if not raw_data_dir.exists():
print(f"Error: raw-data/ not found in workspace")
sys.exit(1)
print(f"📂 Workspace: {workspace}")
print(f"📂 Raw Data: {raw_data_dir}")
print(f"📂 Output: {pages_dir}\n")
# 1. Lade Manifest oder scanne Verzeichnis
manifest_path = raw_data_dir / "manifest.json"
all_pages_metadata = []
page_ids = set()
if manifest_path.exists():
print("📋 Lade Manifest...")
manifest = json.loads(manifest_path.read_text(encoding='utf-8'))
for pid, data in manifest['pages'].items():
all_pages_metadata.append({
'id': pid,
'title': data['title'],
'parent_id': data.get('parent_id')
})
page_ids.add(pid)
print(f" ✅ {len(page_ids)} Seiten im Manifest\n")
else:
print("⚠️ Kein Manifest gefunden. Scanne raw-data/...")
for page_dir in raw_data_dir.iterdir():
if page_dir.is_dir() and page_dir.name.isdigit():
meta_path = page_dir / "meta.json"
if meta_path.exists():
meta = json.loads(meta_path.read_text(encoding='utf-8'))
page_ids.add(meta['id'])
ancestors = meta.get('ancestors', [])
parent_id = ancestors[-1]['id'] if ancestors else None
all_pages_metadata.append({
'id': meta['id'],
'title': meta['title'],
'parent_id': parent_id
})
print(f" ✅ {len(page_ids)} Seiten gefunden\n")
if not page_ids:
print("❌ Keine Seiten gefunden!")
sys.exit(1)
# 2. Generiere Sidebar
print("🔨 Generiere Sidebar...")
sidebar_builder = SidebarBuilder(pages_dir)
sidebar_html = sidebar_builder.build_sidebar_html(all_pages_metadata, page_ids)
print(f" ✅ Sidebar generiert ({len(sidebar_html)} Zeichen)\n")
# 3. Verarbeite alle Seiten
print("🔨 Verarbeite Seiten...")
processor = HTMLProcessor(raw_data_dir, pages_dir, attachments_dir, sidebar_html)
css_files = ['../styles/site.css']
success_count = 0
error_count = 0
for page_id in sorted(page_ids):
try:
processed_html = processor.process_page(page_id, page_ids, css_files)
# Speichere finale HTML
output_path = pages_dir / f"{page_id}.html"
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(processed_html, encoding='utf-8')
# Finde Titel
title = next((p['title'] for p in all_pages_metadata if p['id'] == page_id), page_id)
print(f" ✅ {page_id} - {title}")
success_count += 1
except Exception as e:
print(f" ❌ {page_id} - Fehler: {e}")
error_count += 1
print(f"\n{'='*60}")
print(f"✅ Erfolgreich: {success_count}")
if error_count > 0:
print(f"❌ Fehler: {error_count}")
print(f"{'='*60}")
print(f"\n💡 Öffne: {pages_dir / list(page_ids)[0]}.html")
if __name__ == '__main__':
main()