-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsplit_yaml.py
More file actions
64 lines (49 loc) · 2.14 KB
/
split_yaml.py
File metadata and controls
64 lines (49 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python3
"""
Split and optimize data structures YAML file.
Separates data_flow_graph into its own file and optimizes node format.
"""
import yaml
from pathlib import Path
def split_and_optimize_yaml():
"""Split data_structures_v2.yaml into optimized files."""
# Load the original file
input_path = Path('output_structures/data_structures_v2.yaml')
with open(input_path, 'r') as f:
data = yaml.safe_load(f)
# Extract data_flow_graph
data_flow_graph = data.pop('data_flow_graph', None)
# Optimize nodes format
if data_flow_graph and 'nodes' in data_flow_graph:
optimized_nodes = {}
for node_id, node_data in data_flow_graph['nodes'].items():
# Simplified node format
optimized_nodes[node_id] = {
'name': node_data['name'],
'module': node_data['module'],
'in_deg': node_data['in_degree'],
'out_deg': node_data['out_degree'],
'hub': node_data['is_hub'],
'types': node_data['data_types']
}
data_flow_graph['nodes'] = optimized_nodes
# Save main data structures (without data_flow_graph)
main_output_path = Path('output_structures/data_structures_main.yaml')
with open(main_output_path, 'w') as f:
yaml.dump(data, f, default_flow_style=False, sort_keys=False)
# Save data flow graph separately
if data_flow_graph:
flow_output_path = Path('output_structures/data_flow_graph.yaml')
with open(flow_output_path, 'w') as f:
yaml.dump(data_flow_graph, f, default_flow_style=False, sort_keys=False)
print(f"✓ Split complete!")
print(f" - Main data: {main_output_path}")
print(f" - Flow graph: {flow_output_path}")
# Show optimization results
if data_flow_graph and 'nodes' in data_flow_graph:
print(f" - Nodes optimized: {len(data_flow_graph['nodes'])}")
print(f" - Original format: verbose")
print(f" - New format: compact")
return main_output_path, flow_output_path
if __name__ == '__main__':
split_and_optimize_yaml()