json_structure_parser/resparser.py at main · ThomasMartin83/json_structure_parser · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import json
import os
from collections import defaultdict
from typing import Dict, Union, Set, List, Any
import sys

def parse_json_schema(data: Any, path: str = "") -> Dict[str, Union[Set[str], dict]]:
    """
    Рекурсивно парсит JSON данные и строит схему ключей.
    """
    schema = defaultdict(set)

    if isinstance(data, dict):
        for key, value in data.items():
            new_path = f"{path}.{key}" if path else key
            if isinstance(value, (dict, list)):
                nested_schema = parse_json_schema(value, new_path)
                if key in schema:
                    if isinstance(schema[key], dict) and isinstance(nested_schema, dict):
                        schema[key] = merge_schemas(schema[key], nested_schema)
                    else:
                        schema[key].add(f"mixed_type: {type(value).__name__}")
                else:
                    schema[key] = nested_schema
            else:
                # Обработка случая, когда значение может быть разных типов
                if key in schema and next(iter(schema[key])) != type(value).__name__:
                    schema[key].add(type(value).__name__)
                else:
                    schema[key].add(type(value).__name__)
    elif isinstance(data, list):
        if data:  # Обрабатываем только непустые списки
            first_item = data[0]
            if isinstance(first_item, (dict, list)):
                for item in data:
                    nested_schema = parse_json_schema(item, path)
                    schema = merge_schemas(schema, nested_schema)
            else:
                types = {type(item).__name__ for item in data}
                if path:
                    schema[path].update(types)

    return dict(schema)

def merge_schemas(
    schema1: Dict[str, Union[Set[str], dict]],
    schema2: Dict[str, Union[Set[str], dict]]
) -> Dict[str, Union[Set[str], dict]]:
    """
    Объединяет две схемы JSON.
    """
    merged = defaultdict(set)
    schema1 = defaultdict(set, schema1)
    schema2 = defaultdict(set, schema2)
    all_keys = set(schema1.keys()) | set(schema2.keys())

    for key in all_keys:
        if key in schema1 and key in schema2:
            val1, val2 = schema1[key], schema2[key]

            if isinstance(val1, dict) and isinstance(val2, dict):
                merged[key] = merge_schemas(val1, val2)
            elif isinstance(val1, set) and isinstance(val2, set):
                merged[key] = val1 | val2
            else:
                merged[key] = set()
                if isinstance(val1, dict):
                    merged[key].add("nested_dict")
                    merged[key].update(flatten_dict_types(val1))
                else:
                    merged[key].update(val1)
                if isinstance(val2, dict):
                    merged[key].add("nested_dict")
                    merged[key].update(flatten_dict_types(val2))
                else:
                    merged[key].update(val2)
                merged[key].add("mixed_type")
        elif key in schema1:
            merged[key] = schema1[key]
        else:
            merged[key] = schema2[key]

    return dict(merged)

def flatten_dict_types(d: dict) -> Set[str]:
    """
    Извлекает все типы из вложенного словаря.
    """
    types = set()
    for key, value in d.items():
        if isinstance(value, dict):
            types.add(f"dict:{key}")
            types.update(flatten_dict_types(value))
        else:
            types.update(value)
    return types

def save_schema_to_file(schema: dict, filename: str) -> None:
    """
    Сохраняет схему в текстовый файл.
    """
    output_filename = f"{os.path.splitext(filename)[0]}-структура.txt"

    with open(output_filename, 'w', encoding='utf-8') as f:
        def write_schema(schema: dict, indent: int = 0):
            for key, value in schema.items():
                if isinstance(value, dict):
                    f.write("  " * indent + f"{key}:\n")
                    write_schema(value, indent + 1)
                else:
                    types = ", ".join(sorted(value))
                    f.write("  " * indent + f"- {key} ({types})\n")

        write_schema(schema)
    print(f"Схема сохранена в файл: {output_filename}")

def process_json_file(filename: str) -> None:
    """
    Обрабатывает один JSON файл.
    """
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            data = json.load(f)

        schema = parse_json_schema(data)
        save_schema_to_file(schema, filename)

    except json.JSONDecodeError as e:
        print(f"Ошибка в файле {filename}: некорректный JSON - {e}", file=sys.stderr)
    except Exception as e:
        print(f"Ошибка при обработке файла {filename}: {e}", file=sys.stderr)
        raise  # Добавляем raise для диагностики

def main() -> None:
    """
    Основная функция: находит все JSON файлы в текущей директории и обрабатывает их.
    """
    json_files = [f for f in os.listdir() if f.lower().endswith('.json')]

    if not json_files:
        print("В текущей директории не найдено JSON файлов.", file=sys.stderr)
        return

    print(f"Найдено JSON файлов для обработки: {len(json_files)}")

    for json_file in json_files:
        print(f"\nОбработка файла: {json_file}")
        process_json_file(json_file)

if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print(f"Критическая ошибка: {e}", file=sys.stderr)
        sys.exit(1)