From 6cc97ed3db98ea29d91659f2a89ebbcdc07049de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Vezy?= Date: Wed, 11 Feb 2026 13:32:32 +0100 Subject: [PATCH 1/7] Implement attributes in tables per symbol --- AGENT.md | 11 + Project.toml | 2 + benchmark/Project.toml | 1 + benchmark/README.md | 21 +- benchmark/benchmarks.jl | 274 +++++++++--- docs/src/api.md | 6 + docs/src/tutorials/1.manipulate_node.md | 26 +- .../2.descendants_ancestors_filters.md | 4 +- .../tutorials/7.performance_considerations.md | 13 +- src/MultiScaleTreeGraph.jl | 10 + src/compute_MTG/append_attributes.jl | 15 + src/compute_MTG/columnarize.jl | 25 ++ src/compute_MTG/delete_nodes.jl | 2 + src/compute_MTG/insert_nodes.jl | 29 +- src/compute_MTG/prune.jl | 5 + src/conversion/Tables.jl | 114 +++++ src/read_MTG/parse_mtg.jl | 13 +- src/read_MTG/read_MTG.jl | 6 +- src/types/Attributes.jl | 408 ++++++++++++++++++ src/types/Node.jl | 133 ++++++ test/Project.toml | 1 + test/runtests.jl | 4 + test/test-columnar.jl | 43 ++ 23 files changed, 1084 insertions(+), 82 deletions(-) create mode 100644 src/compute_MTG/columnarize.jl create mode 100644 src/conversion/Tables.jl create mode 100644 src/types/Attributes.jl create mode 100644 test/test-columnar.jl diff --git a/AGENT.md b/AGENT.md index c99dd9c6..6cc0b2cb 100644 --- a/AGENT.md +++ b/AGENT.md @@ -21,6 +21,7 @@ - `src/compute_MTG/indexing.jl` - `src/compute_MTG/check_filters.jl` - `src/types/Node.jl` +- `src/types/Attributes.jl` - `src/compute_MTG/node_funs.jl` ## Practical Optimization Rules @@ -30,4 +31,14 @@ - `descendants!(buffer, node, key; ...)` - Keep filter checks branch-light when no filters are provided. - Keep key access on typed attribute containers (`NamedTuple`, `MutableNamedTuple`, typed dicts) in specialized methods when possible. +- Prefer explicit attribute APIs: + - `attribute(node, key)` + - `attribute!(node, key, value)` + - `attributes(node; format=:namedtuple|:dict)` + - `add_column!` / `drop_column!` / `rename_column!` - Preserve API behavior and add tests for every optimization that changes internals. + +## Benchmark Tiers +- `small` (~10k nodes): full matrix including API-surface benchmarks +- `medium` (~100k nodes): hot-path matrix +- `large` (~300k nodes): critical hot paths only diff --git a/Project.toml b/Project.toml index 1cade87a..f11ef08c 100644 --- a/Project.toml +++ b/Project.toml @@ -14,6 +14,7 @@ MutableNamedTuples = "af6c499f-54b4-48cc-bbd2-094bba7533c7" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" [compat] @@ -25,6 +26,7 @@ MetaGraphsNext = "0.5, 0.6, 0.7" MutableNamedTuples = "0.1" OrderedCollections = "1.4" SHA = "0.7, 1" +Tables = "1" XLSX = "0.7, 0.8, 0.9, 0.10" julia = "1.6" diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 9778dd96..a1d26338 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -1,3 +1,4 @@ [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" MultiScaleTreeGraph = "dd4a991b-8a45-4075-bede-262ee62d5583" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" diff --git a/benchmark/README.md b/benchmark/README.md index 10228f18..5d81609d 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -10,6 +10,21 @@ julia --project=benchmark benchmark/benchmarks.jl Workloads currently covered: -- full-tree traversal -- traversal + data extraction from descendants -- repeated many-small queries (`children`, `parent`, `ancestors`) +- tiered datasets: `small` (~10k nodes), `medium` (~100k), `large` (~300k) +- full-tree traversal and traversal updates + - one attribute update on all nodes + - multi-attribute update on one symbol (`:Leaf`) + - multi-attribute update on mixed symbols (`[:Leaf, :Internode]`) +- descendants queries + - one/many attributes, one symbol + - one/many attributes, mixed symbols + - `ignore_nothing=true/false` + - in-place and allocating variants +- repeated many-small queries + - `children`, `parent`, `ancestors`, `ancestors!` + - repeated descendants retrieval with and without in-place buffers +- API surface (small tier) + - insertion/deletion/pruning + - `transform!`, `select!` + - `symbol_table` / `mtg_table` + - `write_mtg` diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index f35bb4bb..beea4002 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -6,52 +6,84 @@ Pkg.instantiate() using BenchmarkTools using MultiScaleTreeGraph using Random +using Tables const SUITE = BenchmarkGroup() -function synthetic_tree(; branching::Int=4, depth::Int=8, nsamples::Int=256, seed::Int=42) +const SIZE_TIERS = ( + small=10_000, + medium=100_000, + large=300_000, +) + +function synthetic_mtg(; n_nodes::Int=10_000, seed::Int=42) rng = MersenneTwister(seed) root = Node( 1, - NodeMTG("/", "Scale1", 1, 1), - Dict{Symbol,Any}( + MutableNodeMTG(:/, :Plant, 1, 1), + MultiScaleTreeGraph.ColumnarAttrs(Dict{Symbol,Any}( :mass => rand(rng), - :area => rand(rng), :height => rand(rng), - ), + :temperature => 20.0, + )), ) - frontier = [root] + candidates = Node[root] + leaves = Node[] + internodes = Node[] + all_nodes = Node[root] + next_id = 1 + while next_id < n_nodes + parent_ = candidates[rand(rng, eachindex(candidates))] + next_id += 1 - for d in 1:depth - next_frontier = Node{NodeMTG,Dict{Symbol,Any}}[] - sym = "Scale$(d + 1)" - sc = d + 1 - for p in frontier - for b in 1:branching - next_id += 1 - child = Node( - next_id, - p, - NodeMTG("<", sym, b, sc), - Dict{Symbol,Any}( - :mass => rand(rng), - :area => rand(rng), - :height => rand(rng), - ), - ) - push!(next_frontier, child) - end + roll = rand(rng) + if roll < 0.60 + sym = :Internode + scale_ = 2 + link_ = :< + attrs = Dict{Symbol,Any}( + :mass => rand(rng), + :Length => rand(rng), + :Diameter => rand(rng), + ) + elseif roll < 0.95 + sym = :Leaf + scale_ = 3 + link_ = :+ + attrs = Dict{Symbol,Any}( + :mass => rand(rng), + :Width => rand(rng), + :Area => rand(rng), + ) + else + sym = :Axis + scale_ = 2 + link_ = :< + attrs = Dict{Symbol,Any}( + :mass => rand(rng), + :Length => rand(rng), + ) + end + + child = Node(next_id, parent_, MutableNodeMTG(link_, sym, 1, scale_), attrs) + push!(all_nodes, child) + + if sym == :Leaf + push!(leaves, child) + else + push!(internodes, child) + push!(candidates, child) end - frontier = next_frontier end - all_nodes = traverse(root, x -> x, type=typeof(root)) - sample_nodes = rand(rng, all_nodes, min(nsamples, length(all_nodes))) + sample_size = min(512, length(all_nodes)) + sample_nodes = rand(rng, all_nodes, sample_size) + sample_leaves = isempty(leaves) ? sample_nodes : rand(rng, leaves, min(512, length(leaves))) - return root, frontier, sample_nodes + return (root=root, leaves=leaves, internodes=internodes, all_nodes=all_nodes, sample_nodes=sample_nodes, sample_leaves=sample_leaves) end function children_workload(nodes, reps::Int) @@ -79,9 +111,9 @@ function ancestors_workload(nodes, reps::Int) s = 0.0 @inbounds for _ in 1:reps for n in nodes - vals = ancestors(n, :mass, recursivity_level=4, type=Float64) + vals = ancestors(n, :Length, recursivity_level=4, type=Union{Nothing,Float64}) for v in vals - s += v + v === nothing || (s += v) end end end @@ -90,34 +122,163 @@ end function ancestors_workload_inplace(nodes, reps::Int) s = 0.0 - buf = Float64[] + buf = Union{Nothing,Float64}[] @inbounds for _ in 1:reps for n in nodes - ancestors!(buf, n, :mass, recursivity_level=4) + ancestors!(buf, n, :Length, recursivity_level=4) for v in buf - s += v + v === nothing || (s += v) end end end return s end -function descendants_extraction_workload(root) - vals = descendants(root, :mass, type=Float64) +function descendants_repeated_workload(root, reps::Int) s = 0.0 - @inbounds for v in vals - s += v + @inbounds for _ in 1:reps + vals = descendants(root, :Length, symbol=:Internode, ignore_nothing=true, type=Float64) + for v in vals + s += v + end end return s end -function descendants_extraction_workload_inplace_1(root) - descendants!(root, :mass, type=Float64) +function descendants_repeated_workload_inplace(root, reps::Int) + s = 0.0 + buf = Float64[] + @inbounds for _ in 1:reps + descendants!(buf, root, :Length, symbol=:Internode, ignore_nothing=true) + for v in buf + s += v + end + end + return s +end + +function traverse_update_one!(root) + traverse!(root) do node + m = attribute(node, :mass, default=0.0) + attribute!(node, :mass, m + 0.1) + end + return nothing +end + +function traverse_update_multi_leaf!(root) + traverse!(root, symbol=:Leaf) do node + width = attribute(node, :Width, default=0.0) + area = attribute(node, :Area, default=0.0) + attribute!(node, :Width, width * 1.001) + attribute!(node, :Area, area + width) + end + return nothing +end + +function traverse_update_multi_mixed!(root) + traverse!(root, symbol=(:Leaf, :Internode)) do node + m = attribute(node, :mass, default=0.0) + attribute!(node, :mass, m * 0.999 + 0.0001) + attribute!(node, :update_counter, attribute(node, :update_counter, default=0) + 1) + end + return nothing +end + +function descendants_tuple_workload(root) + vals = descendants(root, [:Length, :mass], symbol=:Internode, ignore_nothing=true) + s = 0.0 + for v in vals + s += v[1] + v[2] + end + return s end -function descendants_extraction_workload_inplace_2(root) - vals = Float64[] - descendants!(vals, root, :mass) +function descendants_mixed_one(root; ignore_nothing::Bool) + descendants(root, :Length, symbol=(:Leaf, :Internode), ignore_nothing=ignore_nothing, type=Union{Nothing,Float64}) +end + +function descendants_mixed_many(root; ignore_nothing::Bool) + descendants(root, [:Length, :Area], symbol=(:Leaf, :Internode), ignore_nothing=ignore_nothing) +end + +function build_tier!(suite, tier_name::String, n_nodes::Int) + seed = Int(mod(hash(tier_name), typemax(Int))) + data = synthetic_mtg(n_nodes=n_nodes, seed=seed) + root = data.root + sample_nodes = data.sample_nodes + sample_leaves = data.sample_leaves + + tier = BenchmarkGroup() + suite[tier_name] = tier + + tier["traverse"]["full_tree_nodes"] = @benchmarkable traverse!($root, _ -> nothing) + + tier["traverse_update"]["one_attr_all_nodes"] = @benchmarkable traverse_update_one!($root) + tier["traverse_update"]["multi_attr_leaf_only"] = @benchmarkable traverse_update_multi_leaf!($root) + tier["traverse_update"]["multi_attr_leaf_internode"] = @benchmarkable traverse_update_multi_mixed!($root) + + tier["descendants_query"]["one_attr_one_symbol"] = @benchmarkable descendants($root, :Length, symbol=:Internode, ignore_nothing=true, type=Float64) + tier["descendants_query"]["many_attr_one_symbol"] = @benchmarkable descendants_tuple_workload($root) + tier["descendants_query"]["one_attr_mixed_keep_nothing"] = @benchmarkable descendants_mixed_one($root, ignore_nothing=false) + tier["descendants_query"]["one_attr_mixed_ignore_nothing"] = @benchmarkable descendants_mixed_one($root, ignore_nothing=true) + tier["descendants_query"]["many_attr_mixed_keep_nothing"] = @benchmarkable descendants_mixed_many($root, ignore_nothing=false) + tier["descendants_query"]["many_attr_mixed_ignore_nothing"] = @benchmarkable descendants_mixed_many($root, ignore_nothing=true) + + tier["descendants_query"]["one_attr_one_symbol_inplace"] = @benchmarkable begin + buf = Float64[] + descendants!(buf, $root, :Length, symbol=:Internode, ignore_nothing=true) + end + + tier["many_queries"]["children_repeated"] = @benchmarkable children_workload($sample_nodes, 300) + tier["many_queries"]["parent_repeated"] = @benchmarkable parent_workload($sample_nodes, 300) + tier["many_queries"]["ancestors_repeated"] = @benchmarkable ancestors_workload($sample_leaves, 40) + tier["many_queries"]["ancestors_repeated_inplace"] = @benchmarkable ancestors_workload_inplace($sample_leaves, 40) + tier["many_queries"]["descendants_repeated"] = @benchmarkable descendants_repeated_workload($root, 30) + tier["many_queries"]["descendants_repeated_inplace"] = @benchmarkable descendants_repeated_workload_inplace($root, 30) + + if tier_name == "small" + tier["api_surface_small_only"]["insert_child"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=8_000, seed=111) + mtg_ = data_.root + insert_child!(mtg_[1], MutableNodeMTG(:<, :Internode, 1, 2), x -> Dict{Symbol,Any}(:mass => 0.1), [max_id(mtg_)]) + end + + tier["api_surface_small_only"]["delete_node"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=8_000, seed=222) + mtg_ = data_.root + target = get_node(mtg_, node_id(mtg_[1])) + delete_node!(target) + end + + tier["api_surface_small_only"]["prune_subtree"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=8_000, seed=333) + mtg_ = data_.root + prune!(mtg_[1]) + end + + tier["api_surface_small_only"]["transform"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=8_000, seed=444) + mtg_ = data_.root + transform!(mtg_, :mass => (x -> x + 1.0) => :mass2, ignore_nothing=true) + end + + tier["api_surface_small_only"]["select"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=8_000, seed=555) + mtg_ = data_.root + select!(mtg_, :mass, :Length, ignore_nothing=true) + end + + tier["api_surface_small_only"]["tables_symbol"] = @benchmarkable symbol_table($root, :Leaf) + tier["api_surface_small_only"]["tables_unified"] = @benchmarkable mtg_table($root) + + tier["api_surface_small_only"]["write_mtg"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=3_000, seed=666) + mtg_ = data_.root + f = tempname() * ".mtg" + write_mtg(mtg_, f) + rm(f, force=true) + end + end end suite_name = "mstg" @@ -129,29 +290,14 @@ elseif Sys.islinux() suite_name *= "_linux" end -SUITE[suite_name] = BenchmarkGroup([ - "traverse", - "traverse_extract", - "many_queries", -]) +SUITE[suite_name] = BenchmarkGroup() -root, leaves, sample_nodes = synthetic_tree() -SUITE[suite_name]["traverse"]["full_tree_nodes"] = @benchmarkable traverse!($root, _ -> nothing) -SUITE[suite_name]["traverse_extract"]["descendants_mass"] = @benchmarkable descendants_extraction_workload($root) -SUITE[suite_name]["traverse_extract"]["descendants_mass_inplace_1"] = @benchmarkable descendants_extraction_workload_inplace_1($root) +build_tier!(SUITE[suite_name], "small", SIZE_TIERS.small) +build_tier!(SUITE[suite_name], "medium", SIZE_TIERS.medium) +build_tier!(SUITE[suite_name], "large", SIZE_TIERS.large) -# Add this one only if we have a method for `descendants!(val, node, key, type)` -if hasmethod(descendants!, Tuple{AbstractVector,Node,Symbol}) - SUITE[suite_name]["traverse_extract"]["descendants_mass_inplace_2"] = @benchmarkable descendants_extraction_workload_inplace_2($root) -end - -SUITE[suite_name]["many_queries"]["children_repeated"] = @benchmarkable children_workload($sample_nodes, 300) -SUITE[suite_name]["many_queries"]["parent_repeated"] = @benchmarkable parent_workload($sample_nodes, 300) -SUITE[suite_name]["many_queries"]["ancestors_repeated"] = @benchmarkable ancestors_workload($leaves, 40) -# Test if ancestors! exists in the package first: -if isdefined(MultiScaleTreeGraph, :ancestors!) - SUITE[suite_name]["many_queries"]["ancestors_repeated_inplace"] = @benchmarkable ancestors_workload_inplace($leaves, 40) -end +# Keep the largest tier focused on critical hot paths. +delete!(SUITE[suite_name]["large"], "api_surface_small_only") if abspath(PROGRAM_FILE) == @__FILE__ results = run(SUITE, verbose=true) diff --git a/docs/src/api.md b/docs/src/api.md index c6679784..a13a9b5e 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -2,6 +2,12 @@ Here is a list of all exported functions from MultiScaleTreeGraph.jl. For more details, click on the link and you'll be directed to the function help. +Notable attribute APIs: + +- `attribute`, `attribute!`, `attributes`, `attribute_names` +- `add_column!`, `drop_column!`, `rename_column!` +- `symbol_table`, `mtg_table` (`Tables.jl` sources) + ```@index ``` diff --git a/docs/src/tutorials/1.manipulate_node.md b/docs/src/tutorials/1.manipulate_node.md index d5c3c2b3..0bf81457 100644 --- a/docs/src/tutorials/1.manipulate_node.md +++ b/docs/src/tutorials/1.manipulate_node.md @@ -53,12 +53,12 @@ These fields are considered internal to the package, and are not meant to be acc getfield(mtg, :MTG) ``` -But the preferred way for accessing such values is to use the accessor functions provided by the package: [`parent`](@ref), [`children`](@ref), [`node_attributes`](@ref), [`node_mtg`](@ref), [`node_id`](@ref). +But the preferred way for accessing such values is to use the accessor functions provided by the package: [`parent`](@ref), [`children`](@ref), [`attribute`](@ref), [`attributes`](@ref), [`node_mtg`](@ref), [`node_id`](@ref). So to get the values of the attributes (*i.e.* the variables), you can use: ```@example usepkg -node_attributes(mtg) +attributes(mtg, format=:dict) ``` The `MTG` field from the node helps us describe the node within the MTG. Let's see what's in it: @@ -152,3 +152,25 @@ get_root(node_5) ### Get the attributes This section has its own tutorial! Head over the next page to learn how to get the nodes attributes. + +## Columnar Attribute Backend + +By default, `read_mtg` now loads attributes using a typed columnar backend (per symbol). The node API remains simple: + +```@example usepkg +leaf = get_node(mtg, 5) +attribute(leaf, :Width) +attribute!(leaf, :new_var, 1.0) +attribute_names(leaf) +attributes(leaf, format=:namedtuple) +``` + +For schema-level operations on one symbol: + +```@example usepkg +add_column!(mtg, :Leaf, :temperature, Float64, default=20.0) +rename_column!(mtg, :Leaf, :temperature, :temp) +drop_column!(mtg, :Leaf, :temp) +``` + +`node_attributes(node)` is still available for compatibility, but new code should prefer `attribute`/`attribute!`/`attributes`. diff --git a/docs/src/tutorials/2.descendants_ancestors_filters.md b/docs/src/tutorials/2.descendants_ancestors_filters.md index 5ba2a50d..64e54fc9 100644 --- a/docs/src/tutorials/2.descendants_ancestors_filters.md +++ b/docs/src/tutorials/2.descendants_ancestors_filters.md @@ -54,7 +54,9 @@ The previous notations are both equivalent to: node_attributes(node_5)[:Length] ``` -But we strongly recommend to avoid this last notation. In our case the attributes are stored in a Dictionary (Dict, the default), so we access their values using the Dict notation: `node_attributes(node_5)[:Length]`. But if the attributes are stored as a NamedTuple-alike structure, we must use the dot notation instead: `node_attributes(node_5).Length` (see [Attributes type](@ref) for more details). That is why the package implements the more generic `node_5[:Length]` notation that works with any structure used for the attributes, which helps develop more generic code. +But we strongly recommend to avoid this last notation. First, in our case the attributes are stored in a Dictionary (Dict, the default), so we access their values using the Dict notation: `node_attributes(node_5)[:Length]`. But if the attributes are stored as a NamedTuple-alike structure, we must use the dot notation instead: `node_attributes(node_5).Length` (see [Attributes type](@ref) for more details). Second, the attributes of the nodes are not stored in the structure returned by `node_attributes`, so updating the value of an attribute using this notation does not update the value in the MTG. This is a common mistake that can lead to very hard-to-debug issues. + + That is why the package implements the more generic `node_5[:Length]` notation that works with any structure used for the attributes, which helps develop more generic code. To get the names of all attributes available in the node subtree, you can use [`get_attributes`](@ref): diff --git a/docs/src/tutorials/7.performance_considerations.md b/docs/src/tutorials/7.performance_considerations.md index d26a0efd..afaf3afc 100644 --- a/docs/src/tutorials/7.performance_considerations.md +++ b/docs/src/tutorials/7.performance_considerations.md @@ -8,13 +8,18 @@ It is important to note that MultiScaleTreeGraph.jl is high-performance by desig ## Performance Tips -### Attribute types +### Attribute backend -The type used to store the attributes in the nodes are completely free, and can have a significant impact on performance. By default, the attributes are stored in a `Dict{Symbol, Any}`. This is a very flexible type that allows for getting attributes by name, updating their values, and adding or deleting attributes. However, this flexibility comes at a cost. The `Dict` type is fast but slower than more optimized types. +By default, `read_mtg` uses `ColumnarStore`, a typed per-symbol columnar backend. This is optimized for traversal + attribute extraction workloads and reduces repeated dictionary lookups in hot loops. -If the user just wants to read an MTG and extract information from it, but not update values or add new attributes, it is recommended to use a `NamedTuple` instead. This will improve performance significantly. +The explicit attribute API is: -If the user still want to update values, but does not need to repeatedly add or delete attributes, it is recommended to use a `MutableNamedTuple`. This will improve performance significantly over `Dict`s. +- `attribute(node, :key; default=nothing)` +- `attribute!(node, :key, value)` +- `attributes(node, format=:namedtuple | :dict)` +- `add_column!`, `drop_column!`, `rename_column!` for schema updates + +Legacy containers (`Dict`, `NamedTuple`, `MutableNamedTuple`) are still available by passing `attr_type` explicitly to `read_mtg`. ### MTG encoding diff --git a/src/MultiScaleTreeGraph.jl b/src/MultiScaleTreeGraph.jl index f25a5f71..63c21aa4 100644 --- a/src/MultiScaleTreeGraph.jl +++ b/src/MultiScaleTreeGraph.jl @@ -12,11 +12,13 @@ import DataFrames: DataFrame, insertcols! import DataFrames: transform!, transform # We define our own version for transforming the MTG import DataFrames: select!, select # We define our own version for transforming the MTG import DataFrames: rename! # We define our own version for renaming node attributes +import Tables import MetaGraphsNext: MetaGraph, code_for, add_edge! # Transform to MetaGraph import Graphs.DiGraph import Dates: Date, @dateformat_str, format include("types/AbstractNodeMTG.jl") +include("types/Attributes.jl") include("types/Node.jl") include("read_MTG/read_MTG.jl") include("read_MTG/strip_comments.jl") @@ -35,6 +37,7 @@ include("compute_MTG/check_filters.jl") include("compute_MTG/mutation.jl") include("compute_MTG/append_attributes.jl") include("compute_MTG/traverse.jl") +include("compute_MTG/columnarize.jl") include("compute_MTG/transform.jl") include("compute_MTG/select.jl") include("compute_MTG/delete_nodes.jl") @@ -48,6 +51,7 @@ include("compute_MTG/nleaves.jl") include("compute_MTG/pipe_model.jl") include("compute_MTG/get_node.jl") include("conversion/DataFrame.jl") +include("conversion/Tables.jl") include("conversion/MetaGraph.jl") export read_mtg @@ -91,6 +95,12 @@ export names, scales, symbols, components export node_id, node_mtg, node_attributes export symbol, scale, index, link export symbol!, scale!, index!, link! +export ColumnarStore +export Column, SymbolBucket, MTGAttributeStore, NodeAttrRef +export attribute, attribute!, attributes, attribute_names +export add_column!, drop_column!, rename_column! +export columnarize! +export symbol_table, mtg_table export list_nodes export get_classes export get_description diff --git a/src/compute_MTG/append_attributes.jl b/src/compute_MTG/append_attributes.jl index cf51a5ef..1201ebe4 100755 --- a/src/compute_MTG/append_attributes.jl +++ b/src/compute_MTG/append_attributes.jl @@ -20,6 +20,13 @@ function Base.append!(node::Node{M,T}, attr::T) where {M<:AbstractNodeMTG,T<:Abs merge!(node_attributes(node), attr) end +function Base.append!(node::Node{<:AbstractNodeMTG,ColumnarAttrs}, attr) + for (k, v) in pairs(attr) + attribute!(node, k, v) + end + return node +end + # And ensure compatibility between both so a script wouldn't be broken if we just change the # type of the attributes: function Base.append!(node::Node{<:AbstractNodeMTG,<:AbstractDict}, attr) @@ -46,6 +53,10 @@ function Base.pop!(node::Node{<:AbstractNodeMTG,<:AbstractDict}, key) return poped_value end +function Base.pop!(node::Node{<:AbstractNodeMTG,ColumnarAttrs}, key) + pop!(node_attributes(node), key, nothing) +end + # Renaming attributes: function rename!(node::Node{M,T}, old_new) where {M<:AbstractNodeMTG,T<:MutableNamedTuple} attr_keys = replace([i for i in keys(node_attributes(node))], old_new) @@ -63,3 +74,7 @@ function rename!(node::Node{<:AbstractNodeMTG,<:AbstractDict}, old_new) first(kv) == first(old_new) ? last(old_new) => last(kv) : kv end end + +function rename!(node::Node{<:AbstractNodeMTG,ColumnarAttrs}, old_new) + rename_column!(node, symbol(node), Symbol(first(old_new)), Symbol(last(old_new))) +end diff --git a/src/compute_MTG/columnarize.jl b/src/compute_MTG/columnarize.jl new file mode 100644 index 00000000..f7182972 --- /dev/null +++ b/src/compute_MTG/columnarize.jl @@ -0,0 +1,25 @@ +""" + columnarize!(mtg::Node) + +Bind all node attributes to a single `MTGAttributeStore`. +""" +function columnarize!(mtg::Node) + nodes = traverse(mtg, node -> node, type=typeof(mtg)) + isempty(nodes) && return mtg + + store = MTGAttributeStore() + for n in nodes + attrs = node_attributes(n) + attrs isa ColumnarAttrs || error("columnarize! expects nodes with ColumnarAttrs attributes.") + raw = _isbound(attrs) ? Dict{Symbol,Any}(pairs(attrs)) : attrs.staged + _add_node_with_attrs!(store, node_id(n), symbol(n), raw) + end + + for n in nodes + attrs = node_attributes(n)::ColumnarAttrs + attrs.ref.store = store + attrs.ref.node_id = node_id(n) + empty!(attrs.staged) + end + return mtg +end diff --git a/src/compute_MTG/delete_nodes.jl b/src/compute_MTG/delete_nodes.jl index 007b46db..cd07aefa 100644 --- a/src/compute_MTG/delete_nodes.jl +++ b/src/compute_MTG/delete_nodes.jl @@ -150,6 +150,8 @@ function delete_node!(node::Node{N,A}; child_link_fun=new_child_link) where {N<: end # Clean the old deleted node (isolate it, no parent, no children): + attrs = node_attributes(node) + attrs isa ColumnarAttrs && remove_columnar_node!(attrs) reparent!(node, nothing) rechildren!(node, Node{N,A}[]) diff --git a/src/compute_MTG/insert_nodes.jl b/src/compute_MTG/insert_nodes.jl index d085ace8..89cbad55 100644 --- a/src/compute_MTG/insert_nodes.jl +++ b/src/compute_MTG/insert_nodes.jl @@ -266,6 +266,23 @@ insert_parent!( """ insert_parent!, insert_generation!, insert_child!, insert_sibling! +@inline function _coerce_insert_attrs(::Type{A}, attrs) where {A} + attrs +end + +@inline function _coerce_insert_attrs(::Type{ColumnarAttrs}, attrs) + _to_columnar_attrs(attrs) +end + +@inline function _bind_inserted_columnar!(::Type{A}, parent_for_store::Node, inserted::Node) where {A} + nothing +end + +function _bind_inserted_columnar!(::Type{ColumnarAttrs}, parent_for_store::Node, inserted::Node) + bind_columnar_child!(node_attributes(parent_for_store), node_attributes(inserted), node_id(inserted), symbol(inserted)) + return nothing +end + function insert_parent!(node::Node{N,A}, template, attr_fun=node -> A(), maxid=[max_id(node)]) where {N<:AbstractNodeMTG,A} maxid[1] += 1 @@ -277,9 +294,10 @@ function insert_parent!(node::Node{N,A}, template, attr_fun=node -> A(), maxid=[ nothing, Node{N,A}[node], new_node_MTG(node, template), - copy(attr_fun(node)), + _coerce_insert_attrs(A, copy(attr_fun(node))), Dict{String,Vector{Node{N,A}}}() ) + _bind_inserted_columnar!(A, node, new_node) # Add to the new root the mandatory root attributes: root_attrs = Dict( @@ -298,9 +316,10 @@ function insert_parent!(node::Node{N,A}, template, attr_fun=node -> A(), maxid=[ parent(node), Node{N,A}[node], new_node_MTG(node, template), - copy(attr_fun(node)), + _coerce_insert_attrs(A, copy(attr_fun(node))), Dict{String,Vector{Node{N,A}}}() ) + _bind_inserted_columnar!(A, parent(node), new_node) # Add the new node to the parent: deleteat!(children(parent(node)), findfirst(x -> node_id(x) == node_id(node), children(parent(node)))) @@ -336,9 +355,10 @@ function insert_sibling!(node::Node{N,A}, template, attr_fun=node -> A(), maxid= parent(node), Vector{Node{N,A}}(), new_node_MTG(node, template), - copy(attr_fun(node)), + _coerce_insert_attrs(A, copy(attr_fun(node))), Dict{String,Vector{Node{N,A}}}() ) + _bind_inserted_columnar!(A, parent(node), new_node) # Add the new node to the children of the parent node: push!(children(parent(node)), new_node) @@ -355,9 +375,10 @@ function insert_generation!(node::Node{N,A}, template, attr_fun=node -> A(), max node, children(node), new_node_MTG(node, template), - copy(attr_fun(node)), + _coerce_insert_attrs(A, copy(attr_fun(node))), Dict{String,Vector{Node{N,A}}}() ) + _bind_inserted_columnar!(A, node, new_node) # Add the new node as the only child of the node: rechildren!(node, Node{N,A}[new_node]) diff --git a/src/compute_MTG/prune.jl b/src/compute_MTG/prune.jl index 7df36036..140aa1c0 100644 --- a/src/compute_MTG/prune.jl +++ b/src/compute_MTG/prune.jl @@ -23,6 +23,11 @@ function prune!(node) else parent_node = parent(node) + traverse!(node) do n + attrs = node_attributes(n) + attrs isa ColumnarAttrs && remove_columnar_node!(attrs) + end + # Delete the node as child of his parent: deleteat!(children(parent_node), findfirst(x -> node_id(x) == node_id(node), children(parent_node))) end diff --git a/src/conversion/Tables.jl b/src/conversion/Tables.jl new file mode 100644 index 00000000..f1f09ec0 --- /dev/null +++ b/src/conversion/Tables.jl @@ -0,0 +1,114 @@ +struct MTGTableView + names::Vector{Symbol} + name_to_idx::Dict{Symbol,Int} + cols::Vector{AbstractVector} +end + +function MTGTableView(names::Vector{Symbol}, cols::Vector{AbstractVector}) + idx = Dict{Symbol,Int}() + @inbounds for i in eachindex(names) + idx[names[i]] = i + end + MTGTableView(names, idx, cols) +end + +Tables.istable(::Type{MTGTableView}) = true +Tables.columnaccess(::Type{MTGTableView}) = true +Tables.columns(t::MTGTableView) = t +Tables.columnnames(t::MTGTableView) = Tuple(t.names) +Tables.getcolumn(t::MTGTableView, i::Int) = t.cols[i] +Tables.getcolumn(t::MTGTableView, name::Symbol) = t.cols[t.name_to_idx[name]] +Tables.schema(t::MTGTableView) = Tables.Schema(Tuple(t.names), Tuple(eltype.(t.cols))) + +@inline function _value_type_or_missing(values::Vector{Any}) + T = Union{} + has_missing = false + @inbounds for v in values + if v === missing + has_missing = true + else + T = T === Union{} ? typeof(v) : typejoin(T, typeof(v)) + end + end + + if T === Union{} + return Missing + end + return has_missing ? Union{Missing,T} : T +end + +function _typed_column(values::Vector{Any}) + T = _value_type_or_missing(values) + out = Vector{T}(undef, length(values)) + @inbounds for i in eachindex(values) + out[i] = values[i] + end + out +end + +function _collect_attr_names(nodes) + out = Symbol[] + seen = Set{Symbol}() + for n in nodes + for key in attribute_names(n) + if !(key in seen) + push!(seen, key) + push!(out, key) + end + end + end + out +end + +function _build_attr_columns(nodes, attr_names) + cols = Vector{AbstractVector}(undef, length(attr_names)) + @inbounds for i in eachindex(attr_names) + key = attr_names[i] + values = Vector{Any}(undef, length(nodes)) + for j in eachindex(nodes) + v = attribute(nodes[j], key, default=nothing) + values[j] = v === nothing ? missing : v + end + cols[i] = _typed_column(values) + end + cols +end + +function symbol_table(mtg::Node, symbol::Symbol) + nodes = traverse(mtg, node -> node, symbol=symbol, type=typeof(mtg)) + attr_names = _collect_attr_names(nodes) + attr_cols = _build_attr_columns(nodes, attr_names) + + names = Symbol[:node_id] + cols = Vector{AbstractVector}(undef, 1 + length(attr_cols)) + cols[1] = [node_id(n) for n in nodes] + + @inbounds for i in eachindex(attr_names) + push!(names, attr_names[i]) + cols[i+1] = attr_cols[i] + end + + MTGTableView(names, cols) +end + +function mtg_table(mtg::Node) + nodes = traverse(mtg, node -> node, type=typeof(mtg)) + attr_names = _collect_attr_names(nodes) + attr_cols = _build_attr_columns(nodes, attr_names) + + names = Symbol[:node_id, :symbol, :scale, :index, :link, :parent_id] + cols = Vector{AbstractVector}(undef, 6 + length(attr_cols)) + cols[1] = [node_id(n) for n in nodes] + cols[2] = [symbol(n) for n in nodes] + cols[3] = [scale(n) for n in nodes] + cols[4] = [index(n) for n in nodes] + cols[5] = [link(n) for n in nodes] + cols[6] = [isroot(n) ? missing : node_id(parent(n)) for n in nodes] + + @inbounds for i in eachindex(attr_names) + push!(names, attr_names[i]) + cols[i+6] = attr_cols[i] + end + + MTGTableView(names, cols) +end diff --git a/src/read_MTG/parse_mtg.jl b/src/read_MTG/parse_mtg.jl index 17a7d8a6..66eab50d 100644 --- a/src/read_MTG/parse_mtg.jl +++ b/src/read_MTG/parse_mtg.jl @@ -237,6 +237,14 @@ function parse_node_attributes(attr_type::Type{T}, node_attr) where {T<:Union{Ab Dict{Symbol,Any}(zip(Symbol.(keys(node_attr)), values(node_attr))) end +function parse_node_attributes(::Type{ColumnarStore}, node_attr) + ColumnarAttrs(Dict{Symbol,Any}(zip(Symbol.(keys(node_attr)), values(node_attr)))) +end + +function parse_node_attributes(::Type{ColumnarAttrs}, node_attr) + ColumnarAttrs(Dict{Symbol,Any}(zip(Symbol.(keys(node_attr)), values(node_attr)))) +end + # node_attributes for DataFrame function parse_node_attributes(::Type{DataFrame}, node_attr) DataFrame(node_attr) @@ -250,6 +258,9 @@ function init_empty_attr(attr_type::Type{T}) where {T<:Union{AbstractDict}} attr_type{Symbol,Any}() end +init_empty_attr(::Type{ColumnarStore}) = ColumnarAttrs() +init_empty_attr(::Type{ColumnarAttrs}) = ColumnarAttrs() + """ parse_line_to_node!(tree_dict, l, line, attr_column_start, node_id, attr_type, mtg_type, features,classes) @@ -383,4 +394,4 @@ function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_co # Increment node unique ID: node_id[1] = node_id[1] + 1 end -end \ No newline at end of file +end diff --git a/src/read_MTG/read_MTG.jl b/src/read_MTG/read_MTG.jl index 1a01d8a1..2d19f6f5 100644 --- a/src/read_MTG/read_MTG.jl +++ b/src/read_MTG/read_MTG.jl @@ -1,12 +1,12 @@ """ - read_mtg(file, attr_type = Dict, mtg_type = MutableNodeMTG; sheet_name = nothing) + read_mtg(file, attr_type = ColumnarStore, mtg_type = MutableNodeMTG; sheet_name = nothing) Read an MTG file # Arguments - `file::String`: The path to the MTG file. -- `attr_type::DataType = Dict`: the type used to hold the attribute values for each node. +- `attr_type::DataType = ColumnarStore`: the type used to hold the attribute values for each node. - `mtg_type = MutableNodeMTG`: the type used to hold the mtg encoding for each node (*i.e.* link, symbol, index, scale). See details section below. - `sheet_name = nothing`: the sheet name in case you're reading an `xlsx` or `xlsm` file. It @@ -51,7 +51,7 @@ file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))),"test","files","tr mtg = read_mtg(file) ``` """ -function read_mtg(file, attr_type=Dict, mtg_type=MutableNodeMTG; sheet_name=nothing) +function read_mtg(file, attr_type=ColumnarStore, mtg_type=MutableNodeMTG; sheet_name=nothing) file_extension = splitext(basename(file))[2] if file_extension == ".xlsx" || file_extension == ".xlsm" diff --git a/src/types/Attributes.jl b/src/types/Attributes.jl new file mode 100644 index 00000000..50dcec9f --- /dev/null +++ b/src/types/Attributes.jl @@ -0,0 +1,408 @@ +""" +Marker type used by `read_mtg` to request the columnar attribute backend. +""" +struct ColumnarStore end + +mutable struct Column{T} + name::Symbol + data::Vector{T} + default::T +end + +mutable struct SymbolBucket + symbol::Symbol + row_to_node::Vector{Int} + node_to_row::Dict{Int,Int} + col_index::Dict{Symbol,Int} + columns::Vector{Any} # stores Column{T} + col_types::Vector{Any} +end + +function SymbolBucket(symbol::Symbol) + SymbolBucket(symbol, Int[], Dict{Int,Int}(), Dict{Symbol,Int}(), Any[], Any[]) +end + +mutable struct MTGAttributeStore + symbol_to_bucket::Dict{Symbol,Int} + buckets::Vector{SymbolBucket} + node_bucket::Vector{Int} + node_row::Vector{Int} +end + +function MTGAttributeStore() + MTGAttributeStore(Dict{Symbol,Int}(), SymbolBucket[], Int[], Int[]) +end + +mutable struct NodeAttrRef + store::Union{Nothing,MTGAttributeStore} + node_id::Int +end + +mutable struct ColumnarAttrs <: AbstractDict{Symbol,Any} + ref::NodeAttrRef + staged::Dict{Symbol,Any} +end + +ColumnarAttrs() = ColumnarAttrs(NodeAttrRef(nothing, 0), Dict{Symbol,Any}()) +function ColumnarAttrs(d::AbstractDict) + staged = Dict{Symbol,Any}() + for (k, v) in d + staged[Symbol(k)] = v + end + ColumnarAttrs(NodeAttrRef(nothing, 0), staged) +end + +@inline _isbound(attrs::ColumnarAttrs) = attrs.ref.store !== nothing && attrs.ref.node_id > 0 + +@inline function _ensure_node_capacity!(store::MTGAttributeStore, node_id::Int) + if node_id > length(store.node_bucket) + old = length(store.node_bucket) + resize!(store.node_bucket, node_id) + resize!(store.node_row, node_id) + @inbounds for i in old+1:node_id + store.node_bucket[i] = 0 + store.node_row[i] = 0 + end + end + return nothing +end + +@inline function _normalize_attr_key(key) + key isa Symbol ? key : Symbol(key) +end + +function _get_or_create_bucket!(store::MTGAttributeStore, symbol::Symbol) + bid = get(store.symbol_to_bucket, symbol, 0) + if bid == 0 + push!(store.buckets, SymbolBucket(symbol)) + bid = length(store.buckets) + store.symbol_to_bucket[symbol] = bid + end + return bid +end + +@inline function _column(bucket::SymbolBucket, col_idx::Int) + bucket.columns[col_idx] +end + +function _widen_column!(bucket::SymbolBucket, col_idx::Int, ::Type{NewT}) where {NewT} + old_col = bucket.columns[col_idx] + old_data = old_col.data + new_data = Vector{NewT}(undef, length(old_data)) + @inbounds for i in eachindex(old_data) + new_data[i] = old_data[i] + end + new_default = convert(NewT, old_col.default) + bucket.columns[col_idx] = Column{NewT}(old_col.name, new_data, new_default) + bucket.col_types[col_idx] = NewT + return bucket.columns[col_idx] +end + +function _ensure_column_type!(bucket::SymbolBucket, col_idx::Int, value) + T = bucket.col_types[col_idx] + value_T = typeof(value) + if value_T <: T + return _column(bucket, col_idx) + end + NewT = Union{T,value_T} + return _widen_column!(bucket, col_idx, NewT) +end + +function _add_column_internal!(bucket::SymbolBucket, key::Symbol, ::Type{T}, default::T) where {T} + haskey(bucket.col_index, key) && error("Column $(key) already exists for symbol $(bucket.symbol).") + nrows = length(bucket.row_to_node) + col = Column{T}(key, fill(default, nrows), default) + push!(bucket.columns, col) + push!(bucket.col_types, T) + bucket.col_index[key] = length(bucket.columns) + return nothing +end + +function _add_nullable_column_internal!(bucket::SymbolBucket, key::Symbol, ::Type{T}) where {T} + TT = Union{Nothing,T} + _add_column_internal!(bucket, key, TT, nothing) +end + +function _set_value!(bucket::SymbolBucket, row::Int, key::Symbol, value) + col_idx = get(bucket.col_index, key, 0) + if col_idx == 0 + if value === nothing + _add_nullable_column_internal!(bucket, key, Any) + else + _add_nullable_column_internal!(bucket, key, typeof(value)) + end + col_idx = bucket.col_index[key] + end + + col = _ensure_column_type!(bucket, col_idx, value) + col.data[row] = value + return value +end + +function _get_value(bucket::SymbolBucket, row::Int, key::Symbol, default=nothing) + col_idx = get(bucket.col_index, key, 0) + col_idx == 0 && return default + _column(bucket, col_idx).data[row] +end + +function _bucket_row(ref::NodeAttrRef) + store = ref.store + store === nothing && error("Columnar attributes are not bound to a store.") + node_id = ref.node_id + node_id <= 0 && error("Invalid node id for columnar attributes: $(node_id)") + node_id > length(store.node_bucket) && error("Node id $(node_id) is outside of store bounds.") + bid = store.node_bucket[node_id] + row = store.node_row[node_id] + bid == 0 && error("Node id $(node_id) is no longer attached to the attribute store.") + return store, bid, row +end + +function _add_node_with_attrs!(store::MTGAttributeStore, node_id::Int, symbol::Symbol, attrs::AbstractDict) + _ensure_node_capacity!(store, node_id) + bid = _get_or_create_bucket!(store, symbol) + bucket = store.buckets[bid] + + row = length(bucket.row_to_node) + 1 + push!(bucket.row_to_node, node_id) + bucket.node_to_row[node_id] = row + + @inbounds for i in eachindex(bucket.columns) + col = bucket.columns[i] + push!(col.data, col.default) + end + + store.node_bucket[node_id] = bid + store.node_row[node_id] = row + + for (k, v) in attrs + _set_value!(bucket, row, _normalize_attr_key(k), v) + end + + return nothing +end + +function _remove_node!(store::MTGAttributeStore, node_id::Int) + node_id > length(store.node_bucket) && return nothing + bid = store.node_bucket[node_id] + bid == 0 && return nothing + + bucket = store.buckets[bid] + row = bucket.node_to_row[node_id] + last_row = length(bucket.row_to_node) + moved_node = 0 + + if row != last_row + moved_node = bucket.row_to_node[last_row] + bucket.row_to_node[row] = moved_node + bucket.node_to_row[moved_node] = row + end + pop!(bucket.row_to_node) + delete!(bucket.node_to_row, node_id) + + @inbounds for i in eachindex(bucket.columns) + col = bucket.columns[i] + if row != last_row + col.data[row] = col.data[last_row] + end + pop!(col.data) + end + + if moved_node != 0 + store.node_row[moved_node] = row + end + store.node_bucket[node_id] = 0 + store.node_row[node_id] = 0 + return nothing +end + +function _drop_column_internal!(bucket::SymbolBucket, key::Symbol) + col_idx = get(bucket.col_index, key, 0) + col_idx == 0 && return false + delete!(bucket.col_index, key) + deleteat!(bucket.columns, col_idx) + deleteat!(bucket.col_types, col_idx) + for i in col_idx:length(bucket.columns) + bucket.col_index[_column(bucket, i).name] = i + end + return true +end + +function _rename_column_internal!(bucket::SymbolBucket, from::Symbol, to::Symbol) + from_idx = get(bucket.col_index, from, 0) + from_idx == 0 && return false + haskey(bucket.col_index, to) && error("Column $(to) already exists for symbol $(bucket.symbol).") + delete!(bucket.col_index, from) + bucket.col_index[to] = from_idx + _column(bucket, from_idx).name = to + return true +end + +function _store_for_node_attrs(attrs::ColumnarAttrs) + _isbound(attrs) || return nothing + attrs.ref.store +end + +function init_columnar_root!(attrs::ColumnarAttrs, node_id::Int, symbol::Symbol) + _isbound(attrs) && return attrs + store = MTGAttributeStore() + _add_node_with_attrs!(store, node_id, symbol, attrs.staged) + attrs.ref.store = store + attrs.ref.node_id = node_id + empty!(attrs.staged) + return attrs +end + +function bind_columnar_child!(parent_attrs::ColumnarAttrs, child_attrs::ColumnarAttrs, node_id::Int, symbol::Symbol) + if _isbound(child_attrs) + child_attrs.ref.node_id = node_id + return child_attrs + end + + store = _store_for_node_attrs(parent_attrs) + store === nothing && error("Parent node is not attached to a columnar attribute store.") + _add_node_with_attrs!(store, node_id, symbol, child_attrs.staged) + child_attrs.ref.store = store + child_attrs.ref.node_id = node_id + empty!(child_attrs.staged) + return child_attrs +end + +function remove_columnar_node!(attrs::ColumnarAttrs) + store = _store_for_node_attrs(attrs) + store === nothing && return nothing + _remove_node!(store, attrs.ref.node_id) + attrs.ref.node_id = 0 + return nothing +end + +function add_column!(store::MTGAttributeStore, symbol::Symbol, key::Symbol, ::Type{T}; default::T) where {T} + bid = _get_or_create_bucket!(store, symbol) + _add_column_internal!(store.buckets[bid], key, T, default) + return store +end + +function drop_column!(store::MTGAttributeStore, symbol::Symbol, key::Symbol) + bid = get(store.symbol_to_bucket, symbol, 0) + bid == 0 && return store + _drop_column_internal!(store.buckets[bid], key) + return store +end + +function rename_column!(store::MTGAttributeStore, symbol::Symbol, from::Symbol, to::Symbol) + bid = get(store.symbol_to_bucket, symbol, 0) + bid == 0 && return store + _rename_column_internal!(store.buckets[bid], from, to) + return store +end + +function Base.length(attrs::ColumnarAttrs) + if !_isbound(attrs) + return length(attrs.staged) + end + _, bid, _ = _bucket_row(attrs.ref) + return length(attrs.ref.store.buckets[bid].columns) +end + +function Base.keys(attrs::ColumnarAttrs) + if !_isbound(attrs) + return collect(keys(attrs.staged)) + end + _, bid, _ = _bucket_row(attrs.ref) + bucket = attrs.ref.store.buckets[bid] + out = Vector{Symbol}(undef, length(bucket.columns)) + @inbounds for i in eachindex(bucket.columns) + out[i] = _column(bucket, i).name + end + out +end + +function Base.haskey(attrs::ColumnarAttrs, key::Symbol) + if !_isbound(attrs) + return haskey(attrs.staged, key) + end + _, bid, _ = _bucket_row(attrs.ref) + haskey(attrs.ref.store.buckets[bid].col_index, key) +end +Base.haskey(attrs::ColumnarAttrs, key) = haskey(attrs, _normalize_attr_key(key)) + +function Base.getindex(attrs::ColumnarAttrs, key::Symbol) + if !_isbound(attrs) + return attrs.staged[key] + end + store, bid, row = _bucket_row(attrs.ref) + bucket = store.buckets[bid] + col_idx = get(bucket.col_index, key, 0) + col_idx == 0 && throw(KeyError(key)) + return _column(bucket, col_idx).data[row] +end +Base.getindex(attrs::ColumnarAttrs, key) = getindex(attrs, _normalize_attr_key(key)) + +function Base.get(attrs::ColumnarAttrs, key::Symbol, default) + if !_isbound(attrs) + return get(attrs.staged, key, default) + end + store, bid, row = _bucket_row(attrs.ref) + return _get_value(store.buckets[bid], row, key, default) +end +Base.get(attrs::ColumnarAttrs, key, default) = get(attrs, _normalize_attr_key(key), default) + +function Base.setindex!(attrs::ColumnarAttrs, value, key::Symbol) + if !_isbound(attrs) + attrs.staged[key] = value + return value + end + store, bid, row = _bucket_row(attrs.ref) + _set_value!(store.buckets[bid], row, key, value) + return value +end +Base.setindex!(attrs::ColumnarAttrs, value, key) = setindex!(attrs, value, _normalize_attr_key(key)) + +function Base.iterate(attrs::ColumnarAttrs, state=nothing) + if !_isbound(attrs) + return state === nothing ? iterate(attrs.staged) : iterate(attrs.staged, state) + end + k = keys(attrs) + i = state === nothing ? 1 : state + i > length(k) && return nothing + key = k[i] + return (key => get(attrs, key, nothing), i + 1) +end + +function Base.pop!(attrs::ColumnarAttrs, key, default=nothing) + key_ = _normalize_attr_key(key) + if !_isbound(attrs) + return pop!(attrs.staged, key_, default) + end + + store, bid, row = _bucket_row(attrs.ref) + bucket = store.buckets[bid] + col_idx = get(bucket.col_index, key_, 0) + col_idx == 0 && return default + old = _column(bucket, col_idx).data[row] + _drop_column_internal!(bucket, key_) + return old +end + +function Base.delete!(attrs::ColumnarAttrs, key) + pop!(attrs, key, nothing) + return attrs +end + +function Base.empty!(attrs::ColumnarAttrs) + if !_isbound(attrs) + empty!(attrs.staged) + return attrs + end + store, bid, _ = _bucket_row(attrs.ref) + bucket = store.buckets[bid] + empty!(bucket.col_index) + empty!(bucket.columns) + empty!(bucket.col_types) + return attrs +end + +Base.copy(attrs::ColumnarAttrs) = Dict{Symbol,Any}(pairs(attrs)) + +function Base.show(io::IO, attrs::ColumnarAttrs) + print(io, "ColumnarAttrs(", Dict{Symbol,Any}(pairs(attrs)), ")") +end diff --git a/src/types/Node.jl b/src/types/Node.jl index 6743e3b4..7f20e873 100644 --- a/src/types/Node.jl +++ b/src/types/Node.jl @@ -71,6 +71,14 @@ function Node(id::Int, MTG::T, attributes::A) where {T<:AbstractNodeMTG,A} Node(id, nothing, Vector{Node{T,A}}(), MTG, attributes, Dict{String,Vector{Node{T,A}}}()) end +function Node(id::Int, MTG::T, attributes::ColumnarAttrs) where {T<:AbstractNodeMTG} + node = Node{T,ColumnarAttrs}( + id, nothing, Vector{Node{T,ColumnarAttrs}}(), MTG, attributes, Dict{String,Vector{Node{T,ColumnarAttrs}}}() + ) + init_columnar_root!(attributes, id, getfield(MTG, :symbol)) + return node +end + # If the id is not given, it is the root node, so we use 1 Node(MTG::T, attributes) where {T<:AbstractNodeMTG} = Node(1, MTG, attributes) # Not attributes given, by default we use Dict: @@ -95,6 +103,43 @@ function Node(id::Int, parent::Node{M,A}, MTG::M, attributes::A) where {M<:Abstr return node end +function _to_columnar_attrs(attributes::ColumnarAttrs) + attributes +end + +function _to_columnar_attrs(attributes::AbstractDict) + ColumnarAttrs(attributes) +end + +function _to_columnar_attrs(attributes::NamedTuple) + ColumnarAttrs(Dict{Symbol,Any}(pairs(attributes))) +end + +function _to_columnar_attrs(attributes::MutableNamedTuple) + ColumnarAttrs(Dict{Symbol,Any}(pairs(attributes))) +end + +function Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes::ColumnarAttrs) where {M<:AbstractNodeMTG} + node = Node{M,ColumnarAttrs}( + id, parent, Vector{Node{M,ColumnarAttrs}}(), MTG, attributes, Dict{String,Vector{Node{M,ColumnarAttrs}}}() + ) + addchild!(parent, node) + bind_columnar_child!(node_attributes(parent), attributes, id, getfield(MTG, :symbol)) + return node +end + +function Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes::AbstractDict) where {M<:AbstractNodeMTG} + Node(id, parent, MTG, _to_columnar_attrs(attributes)) +end + +function Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes::NamedTuple) where {M<:AbstractNodeMTG} + Node(id, parent, MTG, _to_columnar_attrs(attributes)) +end + +function Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes::MutableNamedTuple) where {M<:AbstractNodeMTG} + Node(id, parent, MTG, _to_columnar_attrs(attributes)) +end + function Node(id::Int, parent::Node{M,A}, MTG::T, attributes::A) where {M<:AbstractNodeMTG,A,T<:AbstractNodeMTG} error( "The parent node has an MTG encoding of type `$(M)`, but the MTG encoding you provide is of type `$(T)`,", @@ -347,6 +392,94 @@ and should not be used directly. Use *e.g.* `node.key = value` to set a single a """ node_attributes!(node::Node{T,A}, attributes::A) where {T,A} = setfield!(node, :attributes, attributes) +""" + attribute(node::Node, key::Symbol; default=nothing) + +Get one attribute from a node. +""" +attribute(node::Node, key::Symbol; default=nothing) = get(node_attributes(node), key, default) +attribute(node::Node, key; default=nothing) = attribute(node, Symbol(key), default=default) + +""" + attribute!(node::Node, key::Symbol, value) + +Set one attribute on a node. +""" +function attribute!(node::Node, key::Symbol, value) + node_attributes(node)[key] = value + return value +end +attribute!(node::Node, key, value) = attribute!(node, Symbol(key), value) + +""" + attributes(node::Node; format=:namedtuple) + +Get all attributes from a node as a snapshot. +""" +function attributes(node::Node; format=:namedtuple) + attrs = node_attributes(node) + if format == :dict + return Dict{Symbol,Any}(pairs(attrs)) + elseif format == :namedtuple + k = collect(keys(attrs)) + vals = map(key -> get(attrs, key, nothing), k) + return NamedTuple{Tuple(k)}(Tuple(vals)) + else + error("Unknown format $(format). Expected :namedtuple or :dict.") + end +end + +""" + attribute_names(node::Node) + +Return the attribute names available for this node. +""" +attribute_names(node::Node) = collect(keys(node_attributes(node))) + +function _node_store(node::Node) + attrs = node_attributes(node) + attrs isa ColumnarAttrs || error("This operation requires a columnar attribute backend.") + store = _store_for_node_attrs(attrs) + store === nothing && error("Node is not bound to a columnar attribute store.") + return store +end + +function add_column!(node::Node, symbol::Symbol, key::Symbol, ::Type{T}; default::T) where {T} + add_column!(_node_store(node), symbol, key, T, default=default) +end + +function add_column!(node::Node, symbols::AbstractVector{Symbol}, key::Symbol, ::Type{T}; default::T) where {T} + store = _node_store(node) + for sym in symbols + add_column!(store, sym, key, T, default=default) + end + return node +end + +function drop_column!(node::Node, symbol::Symbol, key::Symbol) + drop_column!(_node_store(node), symbol, key) +end + +function drop_column!(node::Node, symbols::AbstractVector{Symbol}, key::Symbol) + store = _node_store(node) + for sym in symbols + drop_column!(store, sym, key) + end + return node +end + +function rename_column!(node::Node, symbol::Symbol, from::Symbol, to::Symbol) + rename_column!(_node_store(node), symbol, from, to) +end + +function rename_column!(node::Node, symbols::AbstractVector{Symbol}, from::Symbol, to::Symbol) + store = _node_store(node) + for sym in symbols + rename_column!(store, sym, from, to) + end + return node +end + """ get_attributes(mtg) diff --git a/test/Project.toml b/test/Project.toml index 67e71b22..f9b076ce 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -4,4 +4,5 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" MutableNamedTuples = "af6c499f-54b4-48cc-bbd2-094bba7533c7" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl index 64b60148..c717df8d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,6 +15,10 @@ end include("test-nodes.jl") end +@testset "columnar backend" begin + include("test-columnar.jl") +end + @testset "descendants()" begin include("test-descendants.jl") end diff --git a/test/test-columnar.jl b/test/test-columnar.jl new file mode 100644 index 00000000..9a03993e --- /dev/null +++ b/test/test-columnar.jl @@ -0,0 +1,43 @@ +using Tables + +file = joinpath(dirname(@__FILE__), "files", "simple_plant.mtg") +mtg = read_mtg(file) + +@test node_attributes(mtg) isa MultiScaleTreeGraph.ColumnarAttrs + +leaf = traverse(mtg, node -> node, symbol=:Leaf, type=typeof(mtg))[1] + +leaf_width = attribute(leaf, :Width, default=nothing) +@test leaf_width !== nothing +@test attribute(mtg, :Width, default=nothing) === nothing + +attribute!(leaf, :new_attr, 42.0) +@test attribute(leaf, :new_attr) == 42.0 +@test :new_attr in attribute_names(leaf) + +attrs_named = attributes(leaf, format=:namedtuple) +attrs_dict = attributes(leaf, format=:dict) +@test attrs_named.Width == leaf_width +@test attrs_dict[:Width] == leaf_width + +add_column!(mtg, :Leaf, :temperature, Float64, default=20.0) +@test attribute(leaf, :temperature) == 20.0 +drop_column!(mtg, :Leaf, :temperature) +@test attribute(leaf, :temperature, default=nothing) === nothing + +add_column!(mtg, :Leaf, :tmpcol, Float64, default=1.0) +rename_column!(mtg, :Leaf, :tmpcol, :tmpcol2) +@test attribute(leaf, :tmpcol2) == 1.0 + +leaf_table = symbol_table(mtg, :Leaf) +leaf_df = DataFrame(leaf_table) +@test :node_id in Symbol.(names(leaf_df)) +@test :Width in Symbol.(names(leaf_df)) +@test nrow(leaf_df) > 0 + +all_table = mtg_table(mtg) +all_df = DataFrame(all_table) +@test :node_id in Symbol.(names(all_df)) +@test :symbol in Symbol.(names(all_df)) +@test all_df.node_id == list_nodes(mtg) +@test any(ismissing, all_df.Width) From 79e2030bb9c18ea163cad4c2c9082fa9436b32a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Vezy?= Date: Wed, 11 Feb 2026 13:50:54 +0100 Subject: [PATCH 2/7] Deprecate `type` in descendants/ancestors, it is now automatically infered --- benchmark/benchmarks.jl | 8 +- .../tutorials/7.performance_considerations.md | 6 +- src/compute_MTG/ancestors.jl | 46 ++++----- src/compute_MTG/descendants.jl | 66 +++++++------ src/compute_MTG/indexing.jl | 18 ++++ src/types/Attributes.jl | 93 +++++++++++++++++++ test/test-ancestors.jl | 15 +-- test/test-descendants.jl | 14 +-- 8 files changed, 191 insertions(+), 75 deletions(-) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index beea4002..7de691ef 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -111,7 +111,7 @@ function ancestors_workload(nodes, reps::Int) s = 0.0 @inbounds for _ in 1:reps for n in nodes - vals = ancestors(n, :Length, recursivity_level=4, type=Union{Nothing,Float64}) + vals = ancestors(n, :Length, recursivity_level=4) for v in vals v === nothing || (s += v) end @@ -137,7 +137,7 @@ end function descendants_repeated_workload(root, reps::Int) s = 0.0 @inbounds for _ in 1:reps - vals = descendants(root, :Length, symbol=:Internode, ignore_nothing=true, type=Float64) + vals = descendants(root, :Length, symbol=:Internode, ignore_nothing=true) for v in vals s += v end @@ -194,7 +194,7 @@ function descendants_tuple_workload(root) end function descendants_mixed_one(root; ignore_nothing::Bool) - descendants(root, :Length, symbol=(:Leaf, :Internode), ignore_nothing=ignore_nothing, type=Union{Nothing,Float64}) + descendants(root, :Length, symbol=(:Leaf, :Internode), ignore_nothing=ignore_nothing) end function descendants_mixed_many(root; ignore_nothing::Bool) @@ -217,7 +217,7 @@ function build_tier!(suite, tier_name::String, n_nodes::Int) tier["traverse_update"]["multi_attr_leaf_only"] = @benchmarkable traverse_update_multi_leaf!($root) tier["traverse_update"]["multi_attr_leaf_internode"] = @benchmarkable traverse_update_multi_mixed!($root) - tier["descendants_query"]["one_attr_one_symbol"] = @benchmarkable descendants($root, :Length, symbol=:Internode, ignore_nothing=true, type=Float64) + tier["descendants_query"]["one_attr_one_symbol"] = @benchmarkable descendants($root, :Length, symbol=:Internode, ignore_nothing=true) tier["descendants_query"]["many_attr_one_symbol"] = @benchmarkable descendants_tuple_workload($root) tier["descendants_query"]["one_attr_mixed_keep_nothing"] = @benchmarkable descendants_mixed_one($root, ignore_nothing=false) tier["descendants_query"]["one_attr_mixed_ignore_nothing"] = @benchmarkable descendants_mixed_one($root, ignore_nothing=true) diff --git a/docs/src/tutorials/7.performance_considerations.md b/docs/src/tutorials/7.performance_considerations.md index afaf3afc..19491b79 100644 --- a/docs/src/tutorials/7.performance_considerations.md +++ b/docs/src/tutorials/7.performance_considerations.md @@ -66,9 +66,11 @@ For repeated queries on large trees, prefer reusing buffers with in-place method vals = Float64[] nodes = typeof(mtg)[] -descendants!(vals, mtg, :Length, type=Float64) -ancestors!(vals, get_node(mtg, 5), :Length, type=Float64) +descendants!(vals, mtg, :Length) +ancestors!(vals, get_node(mtg, 5), :Length) descendants!(nodes, mtg, self=true) ``` This pattern is especially useful when the same query is executed many times (e.g. across millions of leaves). + +`type=` for `descendants`/`ancestors` is deprecated because return eltypes are inferred automatically from typed columns. diff --git a/src/compute_MTG/ancestors.jl b/src/compute_MTG/ancestors.jl index 3c74c550..3d7e2433 100644 --- a/src/compute_MTG/ancestors.jl +++ b/src/compute_MTG/ancestors.jl @@ -26,14 +26,7 @@ is filtered out (`false`). `recursivity_level = 2`. If a negative value is provided (the default), the function returns all valid values from the node to the root. - `ignore_nothing = false`: filter-out the nodes with `nothing` values for the given `key` -- `type::Union{Union,DataType}`: The type of the attribute. Makes the function run much -faster if provided (≈4x faster). - -# Note - -In most cases, the `type` argument should be given as a union of `Nothing` and the data type -of the attribute to manage missing or inexistant data, e.g. measurements made at one scale -only. See examples for more details. +- `type::Union{Union,DataType}`: Deprecated. Return types are inferred automatically. # Examples @@ -45,15 +38,11 @@ mtg = read_mtg(file) # Using a leaf node from the mtg: leaf_node = get_node(mtg, 5) -ancestors(leaf_node, :Length) # Short to write, but slower to execute - -# Fast version, note that we pass a union of Nothing and Float64 because there are some nodes -# without a `Length` attribute: -ancestors(leaf_node, :Length, type = Union{Nothing,Float64}) +ancestors(leaf_node, :Length) # Filter by scale: -ancestors(leaf_node, :XX, scale = 1, type = Float64) -ancestors(leaf_node, :Length, scale = 3, type = Float64) +ancestors(leaf_node, :XX, scale = 1) +ancestors(leaf_node, :Length, scale = 3, ignore_nothing=true) # Filter by symbol: ancestors(leaf_node, :Length, symbol = :Internode) @@ -73,6 +62,7 @@ function ancestors( type::Union{Union,DataType}=Any) symbol = normalize_symbol_filter(symbol) link = normalize_link_filter(link) + _maybe_depwarn_traversal_type_kw(:ancestors, type) # Check the filters once, and then compute the ancestors recursively using `ancestors_` check_filters(node, scale=scale, symbol=symbol, link=link) @@ -81,12 +71,14 @@ function ancestors( filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key) use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) - val = Array{type,1}() + out_type = type === Any ? infer_columnar_attr_type(node, Symbol(key), symbol, ignore_nothing) : type + val = Array{out_type,1}() + key_plan = build_columnar_query_plan(node, Symbol(key)) # Put the recursivity level into an array so it is mutable in-place: if self if use_no_filter || is_filtered(node, scale, symbol, link, filter_fun_) - val_ = unsafe_getindex(node, key) + val_ = unsafe_getindex(node, key, key_plan) push!(val, val_) elseif !all # We don't keep the value and we have to stop at the first filtered-out value @@ -95,15 +87,15 @@ function ancestors( end if use_no_filter - ancestors_values_no_filter!(node, key, val, recursivity_level) + ancestors_values_no_filter!(node, key, val, recursivity_level, key_plan) else - ancestors_values!(node, key, scale, symbol, link, all, filter_fun_, val, recursivity_level) + ancestors_values!(node, key, scale, symbol, link, all, filter_fun_, val, recursivity_level, key_plan) end return val end -function ancestors_values!(node, key, scale, symbol, link, all, filter_fun, val, recursivity_level) +function ancestors_values!(node, key, scale, symbol, link, all, filter_fun, val, recursivity_level, key_plan=nothing) current = node remaining = recursivity_level @@ -112,7 +104,7 @@ function ancestors_values!(node, key, scale, symbol, link, all, filter_fun, val, keep = is_filtered(parent_, scale, symbol, link, filter_fun) if keep - push!(val, unsafe_getindex(parent_, key)) + push!(val, unsafe_getindex(parent_, key, key_plan)) # Only decrement the recursivity level when the current node is not filtered-out remaining -= 1 end @@ -124,13 +116,13 @@ function ancestors_values!(node, key, scale, symbol, link, all, filter_fun, val, return val end -function ancestors_values_no_filter!(node, key, val, recursivity_level) +function ancestors_values_no_filter!(node, key, val, recursivity_level, key_plan=nothing) current = node remaining = recursivity_level while !isroot(current) && remaining != 0 parent_ = parent(current) - push!(val, unsafe_getindex(parent_, key)) + push!(val, unsafe_getindex(parent_, key, key_plan)) remaining -= 1 current = parent_ end @@ -225,23 +217,25 @@ function ancestors!( ) symbol = normalize_symbol_filter(symbol) link = normalize_link_filter(link) + _maybe_depwarn_traversal_type_kw(:ancestors!, type) check_filters(node, scale=scale, symbol=symbol, link=link) filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key) use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) + key_plan = build_columnar_query_plan(node, Symbol(key)) empty!(out) if self if use_no_filter || is_filtered(node, scale, symbol, link, filter_fun_) - push!(out, unsafe_getindex(node, key)) + push!(out, unsafe_getindex(node, key, key_plan)) elseif !all return out end end if use_no_filter - ancestors_values_no_filter!(node, key, out, recursivity_level) + ancestors_values_no_filter!(node, key, out, recursivity_level, key_plan) else - ancestors_values!(node, key, scale, symbol, link, all, filter_fun_, out, recursivity_level) + ancestors_values!(node, key, scale, symbol, link, all, filter_fun_, out, recursivity_level, key_plan) end return out end diff --git a/src/compute_MTG/descendants.jl b/src/compute_MTG/descendants.jl index d9fa7064..66fb2c51 100644 --- a/src/compute_MTG/descendants.jl +++ b/src/compute_MTG/descendants.jl @@ -1,27 +1,38 @@ -function collect_descendant_values!(node, key, scale, symbol, link, filter_fun, all, val, recursivity_level) +@inline function _maybe_depwarn_traversal_type_kw(fname::Symbol, type) + type === Any && return nothing + Base.depwarn( + "Keyword argument `type` in `$fname` is deprecated and will be removed in a future release. " * + "Return types are inferred automatically from the columnar attribute store; remove `type` " * + "and use `ignore_nothing=true` when you want `nothing` values filtered out.", + fname, + ) + return nothing +end + +function collect_descendant_values!(node, key, scale, symbol, link, filter_fun, all, val, recursivity_level, key_plan=nothing) recursivity_level == 0 && return val recursivity_level -= 1 keep = is_filtered(node, scale, symbol, link, filter_fun) if keep - push!(val, unsafe_getindex(node, key)) + push!(val, unsafe_getindex(node, key, key_plan)) elseif !all return val end @inbounds for chnode in children(node) - collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun, all, val, recursivity_level) + collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun, all, val, recursivity_level, key_plan) end return val end -function collect_descendant_values_no_filter!(node, key, val, recursivity_level) +function collect_descendant_values_no_filter!(node, key, val, recursivity_level, key_plan=nothing) recursivity_level == 0 && return val recursivity_level -= 1 - push!(val, unsafe_getindex(node, key)) + push!(val, unsafe_getindex(node, key, key_plan)) @inbounds for chnode in children(node) - collect_descendant_values_no_filter!(chnode, key, val, recursivity_level) + collect_descendant_values_no_filter!(chnode, key, val, recursivity_level, key_plan) end return val end @@ -67,6 +78,7 @@ function descendants( type::Union{Union,DataType}=Any) symbol = normalize_symbol_filter(symbol) link = normalize_link_filter(link) + _maybe_depwarn_traversal_type_kw(:descendants, type) # Check the filters once, and then compute the descendants recursively using `descendants_` check_filters(node, scale=scale, symbol=symbol, link=link) @@ -74,22 +86,24 @@ function descendants( # Change the filtering function if we also want to remove nodes with nothing values: filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key) - val = Array{type,1}() + out_type = type === Any ? infer_columnar_attr_type(node, Symbol(key), symbol, ignore_nothing) : type + val = Array{out_type,1}() + key_plan = build_columnar_query_plan(node, Symbol(key)) use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) if self if use_no_filter - collect_descendant_values_no_filter!(node, key, val, recursivity_level) + collect_descendant_values_no_filter!(node, key, val, recursivity_level, key_plan) else - collect_descendant_values!(node, key, scale, symbol, link, filter_fun_, all, val, recursivity_level) + collect_descendant_values!(node, key, scale, symbol, link, filter_fun_, all, val, recursivity_level, key_plan) end else # If we don't want to include the value of the current node, we apply the traversal to its children directly: for chnode in children(node) if use_no_filter - collect_descendant_values_no_filter!(chnode, key, val, recursivity_level) + collect_descendant_values_no_filter!(chnode, key, val, recursivity_level, key_plan) else - collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun_, all, val, recursivity_level) + collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun_, all, val, recursivity_level, key_plan) end end end @@ -152,23 +166,25 @@ function descendants!( ) symbol = normalize_symbol_filter(symbol) link = normalize_link_filter(link) + _maybe_depwarn_traversal_type_kw(:descendants!, type) check_filters(node, scale=scale, symbol=symbol, link=link) filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key) use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) + key_plan = build_columnar_query_plan(node, Symbol(key)) empty!(out) if self if use_no_filter - collect_descendant_values_no_filter!(node, key, out, recursivity_level) + collect_descendant_values_no_filter!(node, key, out, recursivity_level, key_plan) else - collect_descendant_values!(node, key, scale, symbol, link, filter_fun_, all, out, recursivity_level) + collect_descendant_values!(node, key, scale, symbol, link, filter_fun_, all, out, recursivity_level, key_plan) end else for chnode in children(node) if use_no_filter - collect_descendant_values_no_filter!(chnode, key, out, recursivity_level) + collect_descendant_values_no_filter!(chnode, key, out, recursivity_level, key_plan) else - collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun_, all, out, recursivity_level) + collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun_, all, out, recursivity_level, key_plan) end end end @@ -224,6 +240,7 @@ function descendants!( type::Union{Union,DataType}=Any) where {N,A<:AbstractDict} symbol = normalize_symbol_filter(symbol) link = normalize_link_filter(link) + _maybe_depwarn_traversal_type_kw(:descendants!, type) # Check the filters once, and then compute the descendants recursively using `descendants_` check_filters(node, scale=scale, symbol=symbol, link=link) @@ -323,8 +340,7 @@ is filtered out (`false`). grand-children: `recursivity_level = 2`. If `Inf` (the default) or a negative value is provided, there is no recursion limitation. - `ignore_nothing = false`: filter-out the nodes with `nothing` values for the given `key` -- `type::Union{Union,DataType}`: The type of the attribute. Can make the function run much -faster if provided (*e.g.* ≈4x faster). +- `type::Union{Union,DataType}`: Deprecated. Return types are inferred automatically. # Tips @@ -332,12 +348,6 @@ faster if provided (*e.g.* ≈4x faster). To get the values of the leaves use [`isleaf`](@ref) as the filtering function, e.g.: `descendants(mtg, :Width; filter_fun = isleaf)`. -# Note - -In most cases, the `type` argument should be given as a union of `Nothing` and the data type -of the attribute to manage missing or inexistant data, e.g. measurements made at one scale -only. See examples for more details. - # Examples ```julia @@ -345,15 +355,11 @@ only. See examples for more details. file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))),"test","files","simple_plant.mtg") mtg = read_mtg(file) -descendants(mtg, :Length) # Short to write, but slower to execute - -# Fast version, note that we pass a union of Nothing and Float64 because there are some nodes -# without a `Length` attribute: -descendants(mtg, :Length, type = Union{Nothing,Float64}) +descendants(mtg, :Length) # Filter by scale: -descendants(mtg, :XEuler, scale = 3, type = Union{Nothing, Float64}) -descendants(mtg, :Length, scale = 3, type = Float64) # No `nothing` value here, no need of a union type +descendants(mtg, :XEuler, scale = 3) +descendants(mtg, :Length, scale = 3, ignore_nothing=true) # No `nothing` in output # Filter by symbol: descendants(mtg, :Length, symbol = :Leaf) diff --git a/src/compute_MTG/indexing.jl b/src/compute_MTG/indexing.jl index 8722cb81..c932051b 100644 --- a/src/compute_MTG/indexing.jl +++ b/src/compute_MTG/indexing.jl @@ -64,6 +64,24 @@ function unsafe_getindex(node::Node{M,T} where {M<:AbstractNodeMTG,T<:AbstractDi unsafe_getindex(node, Symbol(key)) end +@inline function unsafe_getindex(node::Node{<:AbstractNodeMTG,ColumnarAttrs}, key::Symbol, plan::ColumnarQueryPlan) + attrs = node_attributes(node) + store = attrs.ref.store + store === nothing && return nothing + nodeid = node_id(node) + nodeid > length(store.node_bucket) && return nothing + bid = store.node_bucket[nodeid] + bid == 0 && return nothing + col_idx = plan.col_idx_by_bucket[bid] + col_idx == 0 && return nothing + row = store.node_row[nodeid] + col = store.buckets[bid].columns[col_idx] + return col.data[row] +end + +@inline unsafe_getindex(node::Node, key::Symbol, plan) = unsafe_getindex(node, key) +@inline unsafe_getindex(node::Node, key, plan) = unsafe_getindex(node, Symbol(key), plan) + """ Returns the length of the subtree below the node (including it) """ diff --git a/src/types/Attributes.jl b/src/types/Attributes.jl index 50dcec9f..87822ada 100644 --- a/src/types/Attributes.jl +++ b/src/types/Attributes.jl @@ -29,6 +29,11 @@ mutable struct MTGAttributeStore node_row::Vector{Int} end +struct ColumnarQueryPlan + key::Symbol + col_idx_by_bucket::Vector{Int} +end + function MTGAttributeStore() MTGAttributeStore(Dict{Symbol,Int}(), SymbolBucket[], Int[], Int[]) end @@ -242,6 +247,94 @@ function _store_for_node_attrs(attrs::ColumnarAttrs) attrs.ref.store end +@inline function _columnar_store(node) + attrs = node_attributes(node) + attrs isa ColumnarAttrs || return nothing + _store_for_node_attrs(attrs) +end + +function build_columnar_query_plan(node, key::Symbol) + store = _columnar_store(node) + store === nothing && return nothing + col_idx_by_bucket = Vector{Int}(undef, length(store.buckets)) + @inbounds for i in eachindex(store.buckets) + col_idx_by_bucket[i] = get(store.buckets[i].col_index, key, 0) + end + ColumnarQueryPlan(key, col_idx_by_bucket) +end + +@inline function _symbol_bucket_ids(store::MTGAttributeStore, symbol_filter) + if symbol_filter === nothing + return collect(eachindex(store.buckets)) + elseif symbol_filter isa Symbol + bid = get(store.symbol_to_bucket, symbol_filter, 0) + return bid == 0 ? Int[] : Int[bid] + elseif symbol_filter isa Union{Tuple,AbstractArray} + out = Int[] + for sym in symbol_filter + bid = get(store.symbol_to_bucket, Symbol(sym), 0) + bid == 0 || push!(out, bid) + end + return out + else + bid = get(store.symbol_to_bucket, Symbol(symbol_filter), 0) + return bid == 0 ? Int[] : Int[bid] + end +end + +@inline function _remove_nothing_type(T) + T === Nothing && return Union{} + parts = Base.uniontypes(T) + isempty(parts) && return T + kept = [p for p in parts if p !== Nothing] + if isempty(kept) + return Union{} + elseif length(kept) == 1 + return kept[1] + end + acc = Union{kept[1],kept[2]} + @inbounds for i in 3:length(kept) + acc = Union{acc,kept[i]} + end + return acc +end + +function infer_columnar_attr_type(node, key::Symbol, symbol_filter, ignore_nothing::Bool) + store = _columnar_store(node) + store === nothing && return Any + bucket_ids = _symbol_bucket_ids(store, symbol_filter) + isempty(bucket_ids) && return Any + + has_any = false + missing_in_some = false + T = Union{} + + @inbounds for bid in bucket_ids + bucket = store.buckets[bid] + col_idx = get(bucket.col_index, key, 0) + if col_idx == 0 + missing_in_some = true + continue + end + has_any = true + col_T = bucket.col_types[col_idx] + T = T === Union{} ? col_T : typejoin(T, col_T) + end + + has_any || return Any + + if ignore_nothing + Tnn = _remove_nothing_type(T) + return Tnn === Union{} ? Any : Tnn + end + + if missing_in_some + return Union{Nothing,T} + end + + return T +end + function init_columnar_root!(attrs::ColumnarAttrs, node_id::Int, symbol::Symbol) _isbound(attrs) && return attrs store = MTGAttributeStore() diff --git a/test/test-ancestors.jl b/test/test-ancestors.jl index b48a1941..d08ee6e0 100644 --- a/test/test-ancestors.jl +++ b/test/test-ancestors.jl @@ -5,23 +5,24 @@ # Using a leaf node from the mtg: leaf_node = get_node(mtg, 5) - @test ancestors(leaf_node, :Width; type=Union{Nothing,Float64}) == reverse(width_all[1:4]) + @test_logs (:warn, r"Keyword argument `type` in `ancestors` is deprecated") ancestors(leaf_node, :Width; type=Union{Nothing,Float64}) @test ancestors(leaf_node, :Width) == reverse(width_all[1:4]) d = ancestors(leaf_node, :Width, scale=3) - @test typeof(d) == Vector{Any} + @test typeof(d) == Vector{Union{Nothing,Float64}} @test length(d) == 1 @test d[1] == width_all[4] - d_typed = ancestors(leaf_node, :Width, type=Union{Nothing,Float64}) - @test typeof(d_typed) == Vector{Union{Nothing,Float64}} - @test ancestors(leaf_node, :Width, symbol=("Leaf", "Internode")) == width_all[[4]] + @test typeof(ancestors(leaf_node, :Width)) == Vector{Union{Nothing,Float64}} + d_symbol = ancestors(leaf_node, :Width, symbol=("Leaf", "Internode")) + @test d_symbol == width_all[[4]] + @test typeof(d_symbol) == Vector{Union{Nothing,Float64}} + @test typeof(ancestors(leaf_node, :Width, ignore_nothing=true)) == Vector{Float64} @test ancestors(leaf_node, :Width, symbol=("Leaf", "Internode"), self=true) == width_all[end:-1:end-1] buf_vals = Union{Nothing,Float64}[] - @test ancestors!(buf_vals, leaf_node, :Width; type=Union{Nothing,Float64}) == - reverse(width_all[1:4]) + @test ancestors!(buf_vals, leaf_node, :Width) == reverse(width_all[1:4]) @test ancestors!(buf_vals, leaf_node, :Width, symbol=("Leaf", "Internode"), self=true) == width_all[end:-1:end-1] diff --git a/test/test-descendants.jl b/test/test-descendants.jl index ba89049b..0b3a161c 100644 --- a/test/test-descendants.jl +++ b/test/test-descendants.jl @@ -1,16 +1,18 @@ @testset "descendants" begin mtg = read_mtg("files/simple_plant.mtg") width_all = [nothing, nothing, 0.02, 0.1, 0.02, 0.1] - @test descendants(mtg, :Width; type=Union{Nothing,Float64}) == width_all + @test_logs (:warn, r"Keyword argument `type` in `descendants` is deprecated") descendants(mtg, :Width; type=Union{Nothing,Float64}) @test descendants(mtg, :Width) == width_all d = descendants(mtg, :Width, scale=1) - @test typeof(d) == Vector{Any} + @test typeof(d) == Vector{Union{Nothing,Float64}} @test length(d) == 1 @test d[1] === width_all[1] - d_typed = descendants(mtg, :Width, type=Union{Nothing,Float64}) - @test typeof(d_typed) == Vector{Union{Nothing,Float64}} - @test descendants(mtg, :Width, symbol=(:Leaf, :Internode)) == width_all[3:end] + @test typeof(descendants(mtg, :Width)) == Vector{Union{Nothing,Float64}} + d_symbol = descendants(mtg, :Width, symbol=(:Leaf, :Internode)) + @test d_symbol == width_all[3:end] + @test typeof(d_symbol) == Vector{Union{Nothing,Float64}} + @test typeof(descendants(mtg, :Width, ignore_nothing=true)) == Vector{Float64} mtg2 = mtg[1][1][1][2] @test descendants(mtg2, :Width, symbol=:Leaf)[1] == width_all[end] @@ -46,5 +48,5 @@ end # using BenchmarkTools # @benchmark descendants($mtg, :Width) # 876 ns # @benchmark descendants!($mtg, :Width) # 5.6 μs -# @benchmark descendants($mtg, :Width; type=Union{Nothing,Float64}) # 7.6 μs +# @benchmark descendants($mtg, :Width) # inferred eltype # @benchmark descendants(mtg, :Width) From 49c18c09683e0f63e619b639da157ab4d2250e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Vezy?= Date: Wed, 11 Feb 2026 14:21:16 +0100 Subject: [PATCH 3/7] Use DFS for `descendants` --- src/MultiScaleTreeGraph.jl | 1 + src/compute_MTG/descendants.jl | 110 +++++++++++++++++++++++---- src/types/Attributes.jl | 132 ++++++++++++++++++++++++++++++++- src/types/Node.jl | 49 +++++++++++- test/test-columnar.jl | 26 +++++++ 5 files changed, 299 insertions(+), 19 deletions(-) diff --git a/src/MultiScaleTreeGraph.jl b/src/MultiScaleTreeGraph.jl index 63c21aa4..cad7a61f 100644 --- a/src/MultiScaleTreeGraph.jl +++ b/src/MultiScaleTreeGraph.jl @@ -99,6 +99,7 @@ export ColumnarStore export Column, SymbolBucket, MTGAttributeStore, NodeAttrRef export attribute, attribute!, attributes, attribute_names export add_column!, drop_column!, rename_column! +export descendants_strategy, descendants_strategy! export columnarize! export symbol_table, mtg_table export list_nodes diff --git a/src/compute_MTG/descendants.jl b/src/compute_MTG/descendants.jl index 66fb2c51..093dfc49 100644 --- a/src/compute_MTG/descendants.jl +++ b/src/compute_MTG/descendants.jl @@ -9,6 +9,69 @@ return nothing end +@inline function _descendants_index_compatible(scale, link, all::Bool, filter_fun, recursivity_level) + scale === nothing || return false + link === nothing || return false + all || return false + filter_fun === nothing || return false + return isinf(recursivity_level) || recursivity_level < 0 +end + +@inline function _bucket_allow_mask(store::MTGAttributeStore, symbol_filter) + symbol_filter === nothing && return nothing + mask = falses(length(store.buckets)) + for bid in _symbol_bucket_ids(store, symbol_filter) + mask[bid] = true + end + return mask +end + +function _collect_descendant_values_indexed!( + out::AbstractVector, + node, + key::Symbol, + key_plan::ColumnarQueryPlan, + symbol_filter, + self::Bool, + ignore_nothing::Bool, +) + store = _columnar_store(node) + store === nothing && return false + _prepare_subtree_index!(store, get_root(node)) || return false + idx = store.subtree_index + + nid0 = node_id(node) + nid0 > length(idx.tin) && return false + left = idx.tin[nid0] + right = idx.tout[nid0] + left == 0 && return false + self || (left += 1) + left > right && return true + + allow_mask = _bucket_allow_mask(store, symbol_filter) + + @inbounds for i in left:right + nid = idx.dfs_order[i] + bid = store.node_bucket[nid] + bid == 0 && continue + allow_mask === nothing || allow_mask[bid] || continue + + col_idx = key_plan.col_idx_by_bucket[bid] + if col_idx == 0 + ignore_nothing && continue + push!(out, nothing) + continue + end + + row = store.node_row[nid] + v = store.buckets[bid].columns[col_idx].data[row] + ignore_nothing && v === nothing && continue + push!(out, v) + end + + return true +end + function collect_descendant_values!(node, key, scale, symbol, link, filter_fun, all, val, recursivity_level, key_plan=nothing) recursivity_level == 0 && return val recursivity_level -= 1 @@ -83,27 +146,34 @@ function descendants( # Check the filters once, and then compute the descendants recursively using `descendants_` check_filters(node, scale=scale, symbol=symbol, link=link) - # Change the filtering function if we also want to remove nodes with nothing values: - filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key) - - out_type = type === Any ? infer_columnar_attr_type(node, Symbol(key), symbol, ignore_nothing) : type + key_ = Symbol(key) + out_type = type === Any ? infer_columnar_attr_type(node, key_, symbol, ignore_nothing) : type val = Array{out_type,1}() - key_plan = build_columnar_query_plan(node, Symbol(key)) + key_plan = build_columnar_query_plan(node, key_) + + if key_plan isa ColumnarQueryPlan && + _descendants_index_compatible(scale, link, all, filter_fun, recursivity_level) && + _collect_descendant_values_indexed!(val, node, key_, key_plan, symbol, self, ignore_nothing) + return val + end + + # Change the filtering function if we also want to remove nodes with nothing values: + filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key_) use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) if self if use_no_filter - collect_descendant_values_no_filter!(node, key, val, recursivity_level, key_plan) + collect_descendant_values_no_filter!(node, key_, val, recursivity_level, key_plan) else - collect_descendant_values!(node, key, scale, symbol, link, filter_fun_, all, val, recursivity_level, key_plan) + collect_descendant_values!(node, key_, scale, symbol, link, filter_fun_, all, val, recursivity_level, key_plan) end else # If we don't want to include the value of the current node, we apply the traversal to its children directly: for chnode in children(node) if use_no_filter - collect_descendant_values_no_filter!(chnode, key, val, recursivity_level, key_plan) + collect_descendant_values_no_filter!(chnode, key_, val, recursivity_level, key_plan) else - collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun_, all, val, recursivity_level, key_plan) + collect_descendant_values!(chnode, key_, scale, symbol, link, filter_fun_, all, val, recursivity_level, key_plan) end end end @@ -168,23 +238,31 @@ function descendants!( link = normalize_link_filter(link) _maybe_depwarn_traversal_type_kw(:descendants!, type) check_filters(node, scale=scale, symbol=symbol, link=link) - filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key) - use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) - key_plan = build_columnar_query_plan(node, Symbol(key)) + key_ = Symbol(key) + key_plan = build_columnar_query_plan(node, key_) empty!(out) + if key_plan isa ColumnarQueryPlan && + _descendants_index_compatible(scale, link, all, filter_fun, recursivity_level) && + _collect_descendant_values_indexed!(out, node, key_, key_plan, symbol, self, ignore_nothing) + return out + end + + filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key_) + use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) + if self if use_no_filter - collect_descendant_values_no_filter!(node, key, out, recursivity_level, key_plan) + collect_descendant_values_no_filter!(node, key_, out, recursivity_level, key_plan) else - collect_descendant_values!(node, key, scale, symbol, link, filter_fun_, all, out, recursivity_level, key_plan) + collect_descendant_values!(node, key_, scale, symbol, link, filter_fun_, all, out, recursivity_level, key_plan) end else for chnode in children(node) if use_no_filter - collect_descendant_values_no_filter!(chnode, key, out, recursivity_level, key_plan) + collect_descendant_values_no_filter!(chnode, key_, out, recursivity_level, key_plan) else - collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun_, all, out, recursivity_level, key_plan) + collect_descendant_values!(chnode, key_, scale, symbol, link, filter_fun_, all, out, recursivity_level, key_plan) end end end diff --git a/src/types/Attributes.jl b/src/types/Attributes.jl index 87822ada..49a4ea76 100644 --- a/src/types/Attributes.jl +++ b/src/types/Attributes.jl @@ -3,6 +3,19 @@ Marker type used by `read_mtg` to request the columnar attribute backend. """ struct ColumnarStore end +mutable struct SubtreeIndexCache + dirty::Bool + built::Bool + strategy::Symbol + query_count::Int + mutation_count::Int + tin::Vector{Int} + tout::Vector{Int} + dfs_order::Vector{Int} +end + +SubtreeIndexCache() = SubtreeIndexCache(true, false, :auto, 0, 0, Int[], Int[], Int[]) + mutable struct Column{T} name::Symbol data::Vector{T} @@ -27,6 +40,7 @@ mutable struct MTGAttributeStore buckets::Vector{SymbolBucket} node_bucket::Vector{Int} node_row::Vector{Int} + subtree_index::SubtreeIndexCache end struct ColumnarQueryPlan @@ -35,7 +49,121 @@ struct ColumnarQueryPlan end function MTGAttributeStore() - MTGAttributeStore(Dict{Symbol,Int}(), SymbolBucket[], Int[], Int[]) + MTGAttributeStore(Dict{Symbol,Int}(), SymbolBucket[], Int[], Int[], SubtreeIndexCache()) +end + +@inline function _validate_descendants_strategy(strategy::Symbol) + strategy in (:auto, :pointer, :indexed) && return strategy + error("Unknown descendants strategy $(strategy). Expected :auto, :pointer, or :indexed.") +end + +@inline descendants_strategy(store::MTGAttributeStore) = store.subtree_index.strategy + +function descendants_strategy!(store::MTGAttributeStore, strategy::Symbol) + store.subtree_index.strategy = _validate_descendants_strategy(strategy) + return store +end + +@inline function _mark_subtree_index_mutation!(store::MTGAttributeStore) + idx = store.subtree_index + idx.dirty = true + if idx.built + idx.mutation_count += 1 + end + return nothing +end + +@inline function _can_use_index_without_rebuild(store::MTGAttributeStore) + idx = store.subtree_index + idx.built && !idx.dirty +end + +@inline function _descendants_should_rebuild_auto(idx::SubtreeIndexCache) + # Keep pointer traversal for mutation-heavy periods; switch when queries dominate. + idx.query_count >= max(8, 4 * idx.mutation_count) +end + +function _rebuild_subtree_index!(store::MTGAttributeStore, root) + nmax = length(store.node_bucket) + tin = Vector{Int}(undef, nmax) + tout = Vector{Int}(undef, nmax) + @inbounds for i in 1:nmax + tin[i] = 0 + tout[i] = 0 + end + + nactive = 0 + @inbounds for i in 1:nmax + nactive += store.node_bucket[i] == 0 ? 0 : 1 + end + dfs_order = Int[] + sizehint!(dfs_order, nactive) + + stack_nodes = Vector{typeof(root)}(undef, 1) + stack_pos = Vector{Int}(undef, 1) + stack_nodes[1] = root + stack_pos[1] = 0 + t = 0 + + while !isempty(stack_nodes) + current = stack_nodes[end] + pos = stack_pos[end] + + if pos == 0 + nid = node_id(current) + t += 1 + tin[nid] = t + push!(dfs_order, nid) + stack_pos[end] = 1 + pos = 1 + end + + ch = children(current) + if pos <= length(ch) + stack_pos[end] = pos + 1 + push!(stack_nodes, ch[pos]) + push!(stack_pos, 0) + else + tout[node_id(current)] = t + pop!(stack_nodes) + pop!(stack_pos) + end + end + + idx = store.subtree_index + idx.tin = tin + idx.tout = tout + idx.dfs_order = dfs_order + idx.dirty = false + idx.built = true + idx.query_count = 0 + idx.mutation_count = 0 + return idx +end + +function _prepare_subtree_index!(store::MTGAttributeStore, root) + idx = store.subtree_index + strategy = idx.strategy + + if strategy === :pointer + return false + elseif strategy === :indexed + _can_use_index_without_rebuild(store) || _rebuild_subtree_index!(store, root) + return true + end + + # :auto + if _can_use_index_without_rebuild(store) + return true + end + + idx.query_count += 1 + if _descendants_should_rebuild_auto(idx) + _rebuild_subtree_index!(store, root) + return true + end + + return false end mutable struct NodeAttrRef @@ -163,6 +291,7 @@ function _bucket_row(ref::NodeAttrRef) end function _add_node_with_attrs!(store::MTGAttributeStore, node_id::Int, symbol::Symbol, attrs::AbstractDict) + _mark_subtree_index_mutation!(store) _ensure_node_capacity!(store, node_id) bid = _get_or_create_bucket!(store, symbol) bucket = store.buckets[bid] @@ -187,6 +316,7 @@ function _add_node_with_attrs!(store::MTGAttributeStore, node_id::Int, symbol::S end function _remove_node!(store::MTGAttributeStore, node_id::Int) + _mark_subtree_index_mutation!(store) node_id > length(store.node_bucket) && return nothing bid = store.node_bucket[node_id] bid == 0 && return nothing diff --git a/src/types/Node.jl b/src/types/Node.jl index 7f20e873..c834f3eb 100644 --- a/src/types/Node.jl +++ b/src/types/Node.jl @@ -220,14 +220,23 @@ AbstractTrees.childtype(::Type{Node{T,A}}) where {T,A} = Node{T,A} Set the parent of the node. """ -reparent!(node::N, p::N2) where {N<:Node{T,A},N2<:Union{Nothing,Node{T,A}}} where {T,A} = setfield!(node, :parent, p) +function reparent!(node::N, p::N2) where {N<:Node{T,A},N2<:Union{Nothing,Node{T,A}}} where {T,A} + setfield!(node, :parent, p) + _mark_structure_mutation!(node) + p === nothing || _mark_structure_mutation!(p) + return p +end """ rechildren!(node::Node{T,A}, chnodes::Vector{Node{T,A}}) where {T,A} Set the children of the node. """ -rechildren!(node::Node{T,A}, chnodes::Vector{Node{T,A}}) where {T,A} = setfield!(node, :children, chnodes) +function rechildren!(node::Node{T,A}, chnodes::Vector{Node{T,A}}) where {T,A} + setfield!(node, :children, chnodes) + _mark_structure_mutation!(node) + return chnodes +end # AbstractTrees.childstatetype(::Type{Node{T,A}}) where {T,A} = Node{T,A} # Set the traits for Node: @@ -444,6 +453,15 @@ function _node_store(node::Node) return store end +@inline function _mark_structure_mutation!(node::Node) + attrs = node_attributes(node) + attrs isa ColumnarAttrs || return nothing + store = _store_for_node_attrs(attrs) + store === nothing && return nothing + _mark_subtree_index_mutation!(store) + return nothing +end + function add_column!(node::Node, symbol::Symbol, key::Symbol, ::Type{T}; default::T) where {T} add_column!(_node_store(node), symbol, key, T, default=default) end @@ -480,6 +498,33 @@ function rename_column!(node::Node, symbols::AbstractVector{Symbol}, from::Symbo return node end +""" + descendants_strategy(node::Node) + descendants_strategy!(node::Node, strategy::Symbol) + +Get or set how `descendants(node, key, ...)` is computed for columnar MTGs. + +- `:auto` (default): choose automatically based on workload. +- `:pointer`: always follow parent/children links directly in the graph. +- `:indexed`: use a precomputed index for descendant lookups. + +The index is based on a Depth-First Search (DFS) visit order (visit a branch deeply, then the +next branch). It can speed up repeated descendant requests on mostly stable trees, while +`:pointer` is often better when the tree structure changes very frequently. +""" +function descendants_strategy(node::Node) + attrs = node_attributes(node) + attrs isa ColumnarAttrs || return :pointer + store = _store_for_node_attrs(attrs) + store === nothing && return :pointer + return descendants_strategy(store) +end + +function descendants_strategy!(node::Node, strategy::Symbol) + descendants_strategy!(_node_store(node), strategy) + return node +end + """ get_attributes(mtg) diff --git a/test/test-columnar.jl b/test/test-columnar.jl index 9a03993e..7add5270 100644 --- a/test/test-columnar.jl +++ b/test/test-columnar.jl @@ -41,3 +41,29 @@ all_df = DataFrame(all_table) @test :symbol in Symbol.(names(all_df)) @test all_df.node_id == list_nodes(mtg) @test any(ismissing, all_df.Width) + +# Hybrid descendants traversal strategy. +@test descendants_strategy(mtg) == :auto +descendants_strategy!(mtg, :indexed) +@test descendants_strategy(mtg) == :indexed + +store = MultiScaleTreeGraph._node_store(mtg) +@test store.subtree_index.dirty +leaf_widths_before = descendants(mtg, :Width, symbol=:Leaf, ignore_nothing=true) +@test !store.subtree_index.dirty +@test store.subtree_index.built + +insert_child!( + mtg[1], + MutableNodeMTG(:+, :Leaf, 0, 3), + _ -> Dict{Symbol,Any}(:Width => 9.99, :Area => 0.01, :mass => 0.1), +) +@test store.subtree_index.dirty +leaf_widths_after = descendants(mtg, :Width, symbol=:Leaf, ignore_nothing=true) +@test !store.subtree_index.dirty +@test length(leaf_widths_after) == length(leaf_widths_before) + 1 +@test leaf_widths_after[end] == 9.99 + +descendants_strategy!(mtg, :pointer) +@test descendants_strategy(mtg) == :pointer +@test descendants(mtg, :Width, symbol=:Leaf, ignore_nothing=true) == leaf_widths_after From 023f675beb3d1c2000c7c060cb1ec7497963b6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Vezy?= Date: Wed, 11 Feb 2026 14:21:45 +0100 Subject: [PATCH 4/7] Add info about performance in `descendants` + light review of doc --- docs/src/api.md | 1 + docs/src/index.md | 2 +- docs/src/the_mtg/mtg_concept.md | 4 +- docs/src/the_mtg/our_implementation.md | 31 ++++++++------ docs/src/tutorials/1.manipulate_node.md | 4 +- .../2.descendants_ancestors_filters.md | 2 +- docs/src/tutorials/3.transform_mtg.md | 10 +++-- docs/src/tutorials/6.add_remove_nodes.md | 9 ++-- .../tutorials/7.performance_considerations.md | 41 ++++++++++++++++--- 9 files changed, 73 insertions(+), 31 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index a13a9b5e..71a03266 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -6,6 +6,7 @@ Notable attribute APIs: - `attribute`, `attribute!`, `attributes`, `attribute_names` - `add_column!`, `drop_column!`, `rename_column!` +- `descendants_strategy`, `descendants_strategy!` (choose automatic/direct/indexed descendants retrieval) - `symbol_table`, `mtg_table` (`Tables.jl` sources) ```@index diff --git a/docs/src/index.md b/docs/src/index.md index 46f7de11..066844e4 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -17,7 +17,7 @@ Documentation for [MultiScaleTreeGraph.jl](https://github.com/VEZY/MultiScaleTre The goal of MultiScaleTreeGraph.jl is to read, write, analyse and plot MTG (Multi-scale Tree Graph) files. -The Multi-Scale Tree Graph, or MTG, is a data structure used to encode a plant to describe its topology (*i.e.* structure) and any attributes (*e.g.* geometry, colours, state...). It was developed in the [AMAP lab](https://amap.cirad.fr/) in the 90's to cope with the need of a generic yet scalable structure for plant topology and traits measurement, analysis and modelling. +The Multi-Scale Tree Graph (MTG) is a format used to describe plant structure (topology) and associated attributes (*e.g.* geometry, colours, state). It was developed in the [AMAP lab](https://amap.cirad.fr/) in the 90's to provide a generic and scalable way to represent plant topology and measurements. The format is described in details in the original paper from Godin et Caraglio (1998). diff --git a/docs/src/the_mtg/mtg_concept.md b/docs/src/the_mtg/mtg_concept.md index b4f1da64..4f6765e3 100644 --- a/docs/src/the_mtg/mtg_concept.md +++ b/docs/src/the_mtg/mtg_concept.md @@ -2,7 +2,7 @@ ## Introduction -The Multi-scale Tree Graph -or MTG for short- is a data structure for describing a tree-alike object at one or several scales. +The Multi-scale Tree Graph (MTG) is a format for describing a plant-like branching object at one or several scales. Let's take a grass plant as an example: @@ -20,7 +20,7 @@ If we get closer again, each axis can be described with more details, and we can *Fig. 2. A grass plant described at three different scales: (a) the plant, (b) the axis and (c) the phytomer. The corresponding MTG graph (mono-scale) is shown on the right of each description.* -Figure 2 shows a different graph for each scale used for the description, but we can join all scales into a single MTG instead. In the MTG, all scales live together in the same data structure. The elementary object is called a node. The nodes are denoted by the little circles in Figure 2. There is one node for the plant scale (Fig. 2a), two nodes for the axis because there are two tillers (Fig. 2b), and six nodes for the phytomers, one for each (Fig. 2c). +Figure 2 shows a different graph for each scale, but we can also merge all scales into one MTG. In an MTG, all scales live together in one structure. The basic element is a **node** (the circles in Figure 2). There is one node for the plant scale (Fig. 2a), two nodes for axes because there are two tillers (Fig. 2b), and six nodes for phytomers, one for each (Fig. 2c). The resulting MTG describing all scales at once in the same graph can be represented as follows: diff --git a/docs/src/the_mtg/our_implementation.md b/docs/src/the_mtg/our_implementation.md index 52b2e2e9..e4111938 100644 --- a/docs/src/the_mtg/our_implementation.md +++ b/docs/src/the_mtg/our_implementation.md @@ -9,9 +9,12 @@ mtg = read_mtg(file) node_6 = get_node(mtg, 6) ``` -In this package, the MTG is represented as a [tree data structure](https://en.wikipedia.org/wiki/Tree_%28data_structure%29). +In this package, the MTG is represented as a tree (nodes linked by parent/children relationships). -The tree is built from a series of nodes with different fields that describe the topology (*i.e.* how nodes are connected together) and the attributes of the node. +The tree is built from nodes. Each node stores: + +- how it is connected to other nodes (its topology) +- its own measured or computed attributes !!! note The package use terms from computer science rather than plant biology. So we use words such as "root" in an MTG, which is not the plant root, but the first node in the tree, *i.e.* the one without any parent. Similarly a leaf node is not a leaf from a plant but a node without any children. @@ -24,37 +27,39 @@ The nodes have their own data type called [`Node`](@ref). A [`Node`](@ref) has s fieldnames(Node) ``` -Here is a little description of each field: +Here is a simple description of each field: - `id`: The unique integer identifier of the node. It can be set by the user but is usually set automatically. - `parent`: The parent node of the curent node. If the curent node is the root node, it will return `nothing`. You can test whether a node is a root node sing the [`isroot`](@ref) function. -- children: a dictionary of the children nodes with their `id` as key, or `nothing` if none; +- `children`: the child nodes. - `MTG`: The MTG encoding of the node (see below, or [`NodeMTG`](@ref)) -- `attributes`: the node attributes. Usually a `NamedTuple`, a `MutableNamedTuple` or a `Dict` or similar (e.g. `OrderedDict`), but the type is optional. The choice of the data structure depends mainly on how much you plan to change the attributes and their values. Attributes include for example the length or diameter of a node, its colour, 3d position... -- `traversal_cache`: a cache for the traversal, used by *e.g.* [`traverse`](@ref) to traverse more efficiently particular nodes in the MTG +- `attributes`: node values (for example length, diameter, color, 3D position). +- `traversal_cache`: saved traversal results used to speed up repeated operations. -The value of tee fields are accessed using accessor functions: [`node_id`](@ref), [`parent`](@ref), [`children`](@ref), [`node_mtg`](@ref), [`node_attributes`](@ref), and the last one `get_traversal_cache` which is not exported because users shouldn't use it directly. +The values of these fields are accessed with helper functions such as [`node_id`](@ref), [`parent`](@ref), [`children`](@ref), [`node_mtg`](@ref), and [`node_attributes`](@ref). -The MTG field of a node describes the topology encoding of the node: its type of link with its parent (decompose: `/`, follow: `<`, and branch: `+`), its symbol, index, and scale (see [Node MTG and attributes](@ref) and [The MTG section](@ref) for more details). The MTG field must be encoded in a data structure called [`NodeMTG`](@ref) or in a [`MutableNodeMTG`](@ref). They have four fields corresponding to the topology encoding: +The MTG field of a node describes how the node is positioned in the graph: link with parent (`/`, `<`, `+`), symbol, index, and scale (see [Node MTG and attributes](@ref) and [The MTG section](@ref) for more details). It is stored as [`NodeMTG`](@ref) or [`MutableNodeMTG`](@ref). These types have four fields: ```@example usepkg fieldnames(NodeMTG) ``` -Creating a [`NodeMTG`](@ref) is very simple, just pass the arguments by position. For example if we have an Axis that decomposes its parent node ("/"), with an index 0 and a scale of 1, we would declare it as follows: +Creating a [`NodeMTG`](@ref) is simple: pass the four values in order. For example, an Axis that decomposes its parent (`"/"`), with index `0` and scale `1`: ```@example usepkg axis_mtg_encoding = NodeMTG("/", "Axis", 0, 1) ``` -The we can access the data using the dot syntax: +Then we can access data using dot syntax: ```@example usepkg axis_mtg_encoding.symbol ``` !!! note - [`NodeMTG`](@ref) is the immutable data type, meaning that information cannot be changed once read. By default the package the mutable equivalent called [`MutableNodeMTG`](@ref). Accessing the information of a mutable data structure is slower, but it is more convenient if we need to change its values. + [`NodeMTG`](@ref) is immutable (cannot be changed after creation). + [`MutableNodeMTG`](@ref) can be changed. + Use mutable if you plan to edit node topology fields. ## Learning by example @@ -79,7 +84,7 @@ typeof(mtg) ``` !!! note - The [`Node`](@ref) is a parametric type, that's why `typeof(mtg)` also returns the type used for the MTG data in the node (`MutableNodeMTG`) and the type used for the attributes (`Dict{Symbol, Any}`). But this is not important here. + `typeof(mtg)` shows extra type details (including MTG encoding type and attribute container type). You usually do not need to worry about these details to use the package. We can access the fields of the node using the accessor functions: @@ -122,4 +127,4 @@ index(mtg) ```@example usepkg scale(mtg) -``` \ No newline at end of file +``` diff --git a/docs/src/tutorials/1.manipulate_node.md b/docs/src/tutorials/1.manipulate_node.md index 0bf81457..9901444a 100644 --- a/docs/src/tutorials/1.manipulate_node.md +++ b/docs/src/tutorials/1.manipulate_node.md @@ -29,13 +29,13 @@ typeof(mtg) This node is the root node of the MTG, meaning the first node of the MTG, the one without any parent. !!! note - Root node and leaf node mean a node without any parent or children respectively. These terms are used in the sense of a [tree data structure](https://en.wikipedia.org/wiki/Tree_(data_structure)). + Root node and leaf node mean a node without any parent or children respectively. You can think of this like a family tree: the root is the first ancestor, and leaves are the endpoints. ## The Node type ### What's a node? -As we learned in the previous section, the node is used as the elementary object to build the MTG. In this package, a node is a data structure used to hold these informations (*i.e.* fields). See the [MTG implementation](@ref) section for more details. +As we learned in the previous section, the node is the basic building block of an MTG. In practice, it is just an object that stores information in named fields. See the [MTG implementation](@ref) section for more details. ### Access the fields of a node diff --git a/docs/src/tutorials/2.descendants_ancestors_filters.md b/docs/src/tutorials/2.descendants_ancestors_filters.md index 64e54fc9..1420f33f 100644 --- a/docs/src/tutorials/2.descendants_ancestors_filters.md +++ b/docs/src/tutorials/2.descendants_ancestors_filters.md @@ -98,7 +98,7 @@ For example one could be interested in computing the total length of all nodes i descendants(mtg, :Length) ``` -The `descendants` function visits every children nodes recursively until finding a leaf node. It returns the values in the same order than the visited nodes. +The `descendants` function visits each child, then each child of each child, and so on until leaves. It returns values in the same order as nodes are visited. The function can also help get the nodes directly if we don't pass any attribute: diff --git a/docs/src/tutorials/3.transform_mtg.md b/docs/src/tutorials/3.transform_mtg.md index 2b573c23..d42de4d8 100644 --- a/docs/src/tutorials/3.transform_mtg.md +++ b/docs/src/tutorials/3.transform_mtg.md @@ -40,12 +40,14 @@ Here is a summary of the different forms you can use: 1. a `:var_name => :new_var_name` pair. This form is used to rename an attribute name 2. a `:var_name => function => :new_var_name` or `[:var_name1, :var_name2...] => function => :new_var_name`. The variables are declared as a `Symbol` or a `String` (or a vector of), and they are passed as positional arguments to `function`. The new variable name is optional, and is automatically generated if not provided by concatenating the source column name(s) and the function name if any, this form would be used as: `:var_name => function`. 3. a `function => :new_var_name` form that applies a function to a node and puts the results in a new attribute. This form is usually applied when searching ancestors or descendants values. -4. a `function` form that applies a mutating function to a node, without expecting any output. This form is used when using a function that already mutates the node, without the need to return anything, *e.g.* [`branching_order!`](@ref). +4. a `function` form that applies a function that directly changes the node, without expecting any output. This form is used when your function already updates the node by itself, *e.g.* [`branching_order!`](@ref). This tutorial is a deep dive into these different forms. !!! note - All examples use the mutating version [`transform!`](@ref), but there is a non-mutating version too ([`transform`](@ref)). It is used likewise but returns a modified copy of the `mtg`, which is a little bit slower. + All examples use [`transform!`](@ref), which updates the same MTG object in place. + The version without `!` ([`transform`](@ref)) returns a new modified copy instead. + The copy version is usually a little slower, but it keeps your original MTG unchanged. ## Form 1: Rename an attribute @@ -69,7 +71,7 @@ print(get_attributes(mtg)) Yes it did! -The equivalent call with the non-mutating version of transform is: +The equivalent call with the copy-returning version (`transform`, without `!`) is: ```@example usepkg new_mtg = transform(mtg, :Width => :width) @@ -242,7 +244,7 @@ select!(mtg_select, :Length => (x -> x / 10) => :length_m, :Width, ignore_nothin DataFrame(mtg_select) ``` -There is also a non-mutating version of the function: +There is also a copy-returning version of the function (`select`, without `!`): ```@example usepkg mtg_select = select(mtg, :Length => (x -> x / 10) => :length_m, :Width, ignore_nothing = true) diff --git a/docs/src/tutorials/6.add_remove_nodes.md b/docs/src/tutorials/6.add_remove_nodes.md index c8d1ae5b..cadfa5b2 100644 --- a/docs/src/tutorials/6.add_remove_nodes.md +++ b/docs/src/tutorials/6.add_remove_nodes.md @@ -16,7 +16,7 @@ Create the root node: mtg = Node(MutableNodeMTG("/", "Plant", 0, 1), Dict{Symbol,Any}(:species => "Grassy-plant")) ``` -The first argument to [`Node`](@ref) is its MTG encoding, which describes the topology of the node: what kind of link it has with its parent, its scale, its index and its symbol. It is given as a [`MutableNodeMTG`](@ref) (or a [`NodeMTG`](@ref)). The second argument is used to add attributes to the MTG. +The first argument to [`Node`](@ref) is its MTG description: link with parent, scale, index, and symbol. It is given as a [`MutableNodeMTG`](@ref) (or a [`NodeMTG`](@ref)). The second argument contains node attributes. ### Node id @@ -77,7 +77,7 @@ DataFrame(mtg, get_attributes(mtg)) #### Insertion functions -Adding nodes recursively is easy, but sometimes we want to insert nodes in-between other nodes. We can still use `Node` to do so, but it becomes a bit cumbersome because you'll have to handle manually the changes in parents, children and siblings. +Adding nodes step by step is easy, but sometimes we want to insert nodes in-between other nodes. We can still use `Node` to do so, but it becomes a bit cumbersome because you'll have to handle manually the changes in parents, children and siblings. We provide some helper functions that does it for you instead: @@ -239,7 +239,10 @@ delete_nodes!(mtg_3, filter_fun = node -> node_mtg(node).index >= 1) ``` !!! note - [`delete_nodes!`](@ref) always return the root node of the MTG. If the root node of the original MTG is deleted, its child becomes the new root node. If the root had several children, it returns an error. The function always return the root node of the new MTG, so if the root has not been modified, it remains the same, but if it has been deleted, the new root is returned. That is why it is preferable to use [`delete_nodes!`](@ref) has a non-mutating function and re-assign the results to an object if it is planned to remove the root node. + [`delete_nodes!`](@ref) always returns the root node of the resulting MTG. + If the old root was deleted and had one child, that child becomes the new root. + If the old root had several children, the function returns an error. + In practice, re-assign the result to keep the correct root reference after deletions. As for [`delete_node!`](@ref) (singular), by default [`delete_nodes!`](@ref) (plural) uses [`new_child_link`](@ref) to re-link the children of the deleted nodes, but the user can provide a custom function. See the function details to learn more about it. diff --git a/docs/src/tutorials/7.performance_considerations.md b/docs/src/tutorials/7.performance_considerations.md index 19491b79..ad6c209b 100644 --- a/docs/src/tutorials/7.performance_considerations.md +++ b/docs/src/tutorials/7.performance_considerations.md @@ -8,6 +8,16 @@ It is important to note that MultiScaleTreeGraph.jl is high-performance by desig ## Performance Tips +### Vocabulary + +This page uses a few technical words. Here is what they mean in MTG terms: + +- **Traversal**: visiting nodes one after another in the graph. +- **Request**: asking the MTG for data, for example `descendants(mtg, :Length)` or `ancestors(node, :Width)`. +- **Direct traversal**: reading the graph by following parent/children links directly. +- **Indexed traversal**: using a precomputed lookup table to answer some descendant requests faster. +- **DFS (Depth-First Search)**: one way to visit nodes; it goes deep into one branch before moving to the next branch. + ### Attribute backend By default, `read_mtg` uses `ColumnarStore`, a typed per-symbol columnar backend. This is optimized for traversal + attribute extraction workloads and reduces repeated dictionary lookups in hot loops. @@ -36,12 +46,12 @@ traverse(mtg, x -> x, symbol=:Internode, link=:<) ### Traversal: node caching -MultiScaleTreeGraph.jl traverses all nodes by default when performing tree traversal. The traversal is done in a recursive manner so it is performant, but not always as fast as it could be. For example, we could have a very large tree with only two leaves at the top. In this case, we would traverse all nodes in the tree, even though we only need to traverse two nodes. +MultiScaleTreeGraph.jl visits all nodes by default when traversing a tree. This is usually fast, but not always optimal. For example, in a very large tree with only two leaves of interest, we still visit every node even though we only need those two leaves. To improve performance, it is possible to cache any type of `traversal`, including any kind of filter on the nodes, and then use the cached traversal instead of the default one. This will improve performance significantly. !!! note - A cache is a data structure that stores the result of a computation so that it can be reused later. In this case, the cache stores a pointer to the nodes from the traversal so that it can be efficiently reused later. This is a common technique to improve performance at the cost of memory, though the memory cost is usually very small. + A cache is simply a saved result that can be reused later. Here, it saves references to the nodes selected by a traversal so the package does not need to re-scan the full tree each time. This usually improves speed at the cost of a small amount of extra memory. To cache a traversal, you can use [`cache_nodes!`](@ref). For example, if you want to cache all the **leaf** nodes in the MTG, you can do: @@ -56,11 +66,11 @@ This will cache all the nodes with the symbol `:Leaf` in the MTG. Then, the tree ### Traversal: descendants values caching -Similarly to caching nodes during tree traversal, the mutating version of [`descendants`](@ref) -[`descendants!`](@ref)- provides a way to cache the values from the descendants of a node. This is useful when the descendants of a node are needed multiple times, as it avoids traversing the tree multiple times. For example, this is useful when computing the total biomass of all wood each segment supports in a tree, as the biomass of a node is the sum of the biomass of its descendants. +Similarly to caching nodes during tree traversal, [`descendants!`](@ref) (the `!` means "update in place") provides a way to cache values from descendants of a node. This is useful when these values are needed many times, because it avoids traversing the tree repeatedly. For example, this is useful when computing total biomass supported by each segment. ### In-place traversal outputs -For repeated queries on large trees, prefer reusing buffers with in-place methods to reduce allocations: +For repeated requests on large trees, prefer reusing buffers with in-place methods to reduce allocations: ```julia vals = Float64[] @@ -71,6 +81,27 @@ ancestors!(vals, get_node(mtg, 5), :Length) descendants!(nodes, mtg, self=true) ``` -This pattern is especially useful when the same query is executed many times (e.g. across millions of leaves). +This pattern is especially useful when the same request is executed many times (e.g. across millions of leaves). `type=` for `descendants`/`ancestors` is deprecated because return eltypes are inferred automatically from typed columns. + +### Hybrid descendants backend (growth-safe) + +For `descendants(node, key, ...)` on columnar MTGs, the package supports: + +- `:auto` (default): use direct graph traversal when the MTG is changing a lot, and switch to indexed traversal when read requests dominate +- `:pointer`: always use direct graph traversal +- `:indexed`: always use the index (rebuilding it when structure changed) + +In other words: + +- If your simulation is currently **growing a lot** (many inserted/deleted organs), `:pointer` behavior is usually best. +- If your simulation is mostly **reading values repeatedly** from a mostly stable structure, `:indexed` behavior can be faster. +- `:auto` tries to choose between both automatically. + +```julia +descendants_strategy(mtg) # :auto +descendants_strategy!(mtg, :pointer) +descendants_strategy!(mtg, :indexed) +descendants_strategy!(mtg, :auto) +``` From cea7b853652d39c53e9960a1cfb7a5f24e846c75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Vezy?= Date: Wed, 11 Feb 2026 15:31:50 +0100 Subject: [PATCH 5/7] Remove back-compatibility with other types of attributes, now its just ColumnarAttrs --- benchmark/benchmarks.jl | 45 +++++-- .../tutorials/7.performance_considerations.md | 2 +- src/compute_MTG/caching.jl | 2 +- src/read_MTG/parse_mtg.jl | 59 ++------- src/read_MTG/read_MTG.jl | 26 ++-- src/types/Node.jl | 64 ++-------- test/runtests.jl | 2 +- test/test-caching.jl | 4 +- test/test-conversion.jl | 4 +- test/test-insert_node.jl | 3 +- test/test-mutation.jl | 36 +----- test/test-nodes.jl | 8 +- test/test-read_mtg.jl | 113 +++--------------- test/test-transform.jl | 2 +- test/test-traverse.jl | 6 +- test/test-write_mtg.jl | 8 +- 16 files changed, 106 insertions(+), 278 deletions(-) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 7de691ef..54865c64 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -9,6 +9,8 @@ using Random using Tables const SUITE = BenchmarkGroup() +const HAS_EXPLICIT_ATTRIBUTE_API = isdefined(MultiScaleTreeGraph, :attribute) && isdefined(MultiScaleTreeGraph, :attribute!) +const HAS_TABLE_VIEWS_API = isdefined(MultiScaleTreeGraph, :symbol_table) && isdefined(MultiScaleTreeGraph, :mtg_table) const SIZE_TIERS = ( small=10_000, @@ -22,11 +24,11 @@ function synthetic_mtg(; n_nodes::Int=10_000, seed::Int=42) root = Node( 1, MutableNodeMTG(:/, :Plant, 1, 1), - MultiScaleTreeGraph.ColumnarAttrs(Dict{Symbol,Any}( + Dict{Symbol,Any}( :mass => rand(rng), :height => rand(rng), :temperature => 20.0, - )), + ), ) candidates = Node[root] @@ -86,6 +88,21 @@ function synthetic_mtg(; n_nodes::Int=10_000, seed::Int=42) return (root=root, leaves=leaves, internodes=internodes, all_nodes=all_nodes, sample_nodes=sample_nodes, sample_leaves=sample_leaves) end +@inline function _attr_get(node, key::Symbol; default=nothing) + if HAS_EXPLICIT_ATTRIBUTE_API + return attribute(node, key, default=default) + end + return get(node_attributes(node), key, default) +end + +@inline function _attr_set!(node, key::Symbol, value) + if HAS_EXPLICIT_ATTRIBUTE_API + return attribute!(node, key, value) + end + node[key] = value + return value +end + function children_workload(nodes, reps::Int) s = 0 @inbounds for _ in 1:reps @@ -159,27 +176,27 @@ end function traverse_update_one!(root) traverse!(root) do node - m = attribute(node, :mass, default=0.0) - attribute!(node, :mass, m + 0.1) + m = _attr_get(node, :mass, default=0.0) + _attr_set!(node, :mass, m + 0.1) end return nothing end function traverse_update_multi_leaf!(root) traverse!(root, symbol=:Leaf) do node - width = attribute(node, :Width, default=0.0) - area = attribute(node, :Area, default=0.0) - attribute!(node, :Width, width * 1.001) - attribute!(node, :Area, area + width) + width = _attr_get(node, :Width, default=0.0) + area = _attr_get(node, :Area, default=0.0) + _attr_set!(node, :Width, width * 1.001) + _attr_set!(node, :Area, area + width) end return nothing end function traverse_update_multi_mixed!(root) traverse!(root, symbol=(:Leaf, :Internode)) do node - m = attribute(node, :mass, default=0.0) - attribute!(node, :mass, m * 0.999 + 0.0001) - attribute!(node, :update_counter, attribute(node, :update_counter, default=0) + 1) + m = _attr_get(node, :mass, default=0.0) + _attr_set!(node, :mass, m * 0.999 + 0.0001) + _attr_set!(node, :update_counter, _attr_get(node, :update_counter, default=0) + 1) end return nothing end @@ -268,8 +285,10 @@ function build_tier!(suite, tier_name::String, n_nodes::Int) select!(mtg_, :mass, :Length, ignore_nothing=true) end - tier["api_surface_small_only"]["tables_symbol"] = @benchmarkable symbol_table($root, :Leaf) - tier["api_surface_small_only"]["tables_unified"] = @benchmarkable mtg_table($root) + if HAS_TABLE_VIEWS_API + tier["api_surface_small_only"]["tables_symbol"] = @benchmarkable symbol_table($root, :Leaf) + tier["api_surface_small_only"]["tables_unified"] = @benchmarkable mtg_table($root) + end tier["api_surface_small_only"]["write_mtg"] = @benchmarkable begin data_ = synthetic_mtg(n_nodes=3_000, seed=666) diff --git a/docs/src/tutorials/7.performance_considerations.md b/docs/src/tutorials/7.performance_considerations.md index ad6c209b..05b2bbe4 100644 --- a/docs/src/tutorials/7.performance_considerations.md +++ b/docs/src/tutorials/7.performance_considerations.md @@ -29,7 +29,7 @@ The explicit attribute API is: - `attributes(node, format=:namedtuple | :dict)` - `add_column!`, `drop_column!`, `rename_column!` for schema updates -Legacy containers (`Dict`, `NamedTuple`, `MutableNamedTuple`) are still available by passing `attr_type` explicitly to `read_mtg`. +`read_mtg` always uses the typed columnar backend. If you build nodes manually, you can still pass `Dict`/`NamedTuple` values, and they are converted automatically to columnar attributes. ### MTG encoding diff --git a/src/compute_MTG/caching.jl b/src/compute_MTG/caching.jl index 88b92789..a0085b76 100644 --- a/src/compute_MTG/caching.jl +++ b/src/compute_MTG/caching.jl @@ -41,7 +41,7 @@ when traversing using [`traverse!`](@ref) or [`transform!`](@ref). ```julia file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))), "test", "files", "simple_plant.mtg") -mtg = read_mtg(file, Dict) +mtg = read_mtg(file) # Cache all leaf nodes: cache_nodes!(mtg, symbol=:Leaf) diff --git a/src/read_MTG/parse_mtg.jl b/src/read_MTG/parse_mtg.jl index 66eab50d..c7239b41 100644 --- a/src/read_MTG/parse_mtg.jl +++ b/src/read_MTG/parse_mtg.jl @@ -19,7 +19,7 @@ The buffered IO stream (`f`) should start at the line of the section. The parsed MTG section """ -function parse_mtg!(f, classes, features, line, l, attr_type, mtg_type) +function parse_mtg!(f, classes, features, line, l, mtg_type) l[1] = next_line!(f, line) if length(l[1]) == 0 @@ -78,7 +78,7 @@ function parse_mtg!(f, classes, features, line, l, attr_type, mtg_type) while !eof(f) l[1] = next_line!(f, line; whitespace=false) length(l[1]) == 0 && continue # ignore empty line - parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, attr_type, mtg_type, features, classes) + parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, classes) end catch e error( @@ -132,7 +132,6 @@ Parse MTG node attributes names, values and type # Arguments - `node_data::String`: A splitted mtg node data (attributes) -- `attr_type::DataType`: the type of the structure used to hold the attributes - `features::DataFrame`: The features data.frame - `attr_column_start::Integer`: The index of the column of the first attribute - `line::Integer`: The current line of the mtg file @@ -143,10 +142,10 @@ Parse MTG node attributes names, values and type A list of attributes """ -function parse_MTG_node_attr(node_data, attr_type, features, attr_column_start, line; force=false) +function parse_MTG_node_attr(node_data, features, attr_column_start, line; force=false) if length(node_data) < attr_column_start - return init_empty_attr(attr_type) + return init_empty_attr() end node_data_attr = node_data[attr_column_start:end] @@ -217,58 +216,26 @@ function parse_MTG_node_attr(node_data, attr_type, features, attr_column_start, end end - parse_node_attributes(attr_type, node_attr) + parse_node_attributes(node_attr) end """ - -Instantiate a `attr_type` struct with `node_attr` keys and values - -# Arguments - -- `attr_type::DataType`: the type of the structure used to hold the attributes -- `node_attr::String`: The node attributes as a `Dict` +Instantiate a `ColumnarAttrs` struct with `node_attr` keys and values. """ -function parse_node_attributes(attr_type::Type{T}, node_attr) where {T<:Union{NamedTuple,MutableNamedTuple}} - attr_type{tuple(Symbol.(keys(node_attr))...)}(tuple(values(node_attr)...)) -end - -function parse_node_attributes(attr_type::Type{T}, node_attr) where {T<:Union{AbstractDict}} - Dict{Symbol,Any}(zip(Symbol.(keys(node_attr)), values(node_attr))) -end - -function parse_node_attributes(::Type{ColumnarStore}, node_attr) +function parse_node_attributes(node_attr) ColumnarAttrs(Dict{Symbol,Any}(zip(Symbol.(keys(node_attr)), values(node_attr)))) end -function parse_node_attributes(::Type{ColumnarAttrs}, node_attr) - ColumnarAttrs(Dict{Symbol,Any}(zip(Symbol.(keys(node_attr)), values(node_attr)))) -end - -# node_attributes for DataFrame -function parse_node_attributes(::Type{DataFrame}, node_attr) - DataFrame(node_attr) -end - -function init_empty_attr(attr_type) - attr_type() -end - -function init_empty_attr(attr_type::Type{T}) where {T<:Union{AbstractDict}} - attr_type{Symbol,Any}() -end - -init_empty_attr(::Type{ColumnarStore}) = ColumnarAttrs() -init_empty_attr(::Type{ColumnarAttrs}) = ColumnarAttrs() +init_empty_attr() = ColumnarAttrs() """ - parse_line_to_node!(tree_dict, l, line, attr_column_start, node_id, attr_type, mtg_type, features,classes) + parse_line_to_node!(tree_dict, l, line, attr_column_start, node_id, mtg_type, features,classes) Parse a line of the MTG file to a node and add it to the tree dictionary. It may also add several nodes if the line contains several MTG elements. """ -function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, attr_type, mtg_type, features, classes) +function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, classes) splitted_MTG = split(l[1], "\t") node_column = findfirst(x -> length(x) > 0, splitted_MTG) @@ -292,10 +259,10 @@ function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_co # Get node attributes: if features !== nothing - node_attr = parse_MTG_node_attr(node_data, attr_type, features, node_attr_column_start, line) + node_attr = parse_MTG_node_attr(node_data, features, node_attr_column_start, line) else # if there are no attribute in the MTG, we create an empty attribute: - node_attr = init_empty_attr(attr_type) + node_attr = init_empty_attr() end if node[1] == "^" @@ -340,7 +307,7 @@ function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_co if k == length(node) || findfirst(x -> x == k, shared) !== nothing node_k_attr = node_attr else - node_k_attr = init_empty_attr(attr_type) + node_k_attr = init_empty_attr() end if k == minimum(building_nodes) diff --git a/src/read_MTG/read_MTG.jl b/src/read_MTG/read_MTG.jl index 2d19f6f5..fbfb52a9 100644 --- a/src/read_MTG/read_MTG.jl +++ b/src/read_MTG/read_MTG.jl @@ -1,12 +1,11 @@ """ - read_mtg(file, attr_type = ColumnarStore, mtg_type = MutableNodeMTG; sheet_name = nothing) + read_mtg(file, mtg_type = MutableNodeMTG; sheet_name = nothing) Read an MTG file # Arguments - `file::String`: The path to the MTG file. -- `attr_type::DataType = ColumnarStore`: the type used to hold the attribute values for each node. - `mtg_type = MutableNodeMTG`: the type used to hold the mtg encoding for each node (*i.e.* link, symbol, index, scale). See details section below. - `sheet_name = nothing`: the sheet name in case you're reading an `xlsx` or `xlsm` file. It @@ -14,14 +13,8 @@ reads the first sheet if `nothing` (default behavior). # Details -`attr_type` should be: - -- `NamedTuple` if you don't plan to modify the attributes of the mtg, *e.g.* to use them for -plotting or computing statistics... -- `MutableNamedTuple` if you plan to modify the attributes values but not adding new attributes -very often, *e.g.* recompute an attribute value... -- `Dict` or similar (*e.g.* `OrderedDict`) if you plan to heavily modify the attributes, *e.g.* -adding/removing attributes a lot +Attributes are always stored as `ColumnarAttrs` (typed columnar backend). +Input values from the file are converted automatically. The `MultiScaleTreeGraph` package provides two types for `mtg_type`, one immutable ([`NodeMTG`](@ref)), and one mutable ([`MutableNodeMTG`](@ref)). If you're planning on modifying the mtg encoding of @@ -43,15 +36,12 @@ The MTG root node. file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))),"test","files","simple_plant.mtg") mtg = read_mtg(file) -# Or using another `MutableNamedTuple` for the attributes to be able to add one if needed: -mtg = read_mtg(file,Dict); - # We can also read an mtg directly from an excel file from the field: file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))),"test","files","tree3h.xlsx") mtg = read_mtg(file) ``` """ -function read_mtg(file, attr_type=ColumnarStore, mtg_type=MutableNodeMTG; sheet_name=nothing) +function read_mtg(file, mtg_type=MutableNodeMTG; sheet_name=nothing) file_extension = splitext(basename(file))[2] if file_extension == ".xlsx" || file_extension == ".xlsm" @@ -66,14 +56,14 @@ function read_mtg(file, attr_type=ColumnarStore, mtg_type=MutableNodeMTG; sheet_ DelimitedFiles.writedlm(f, xlsx_data) seekstart(f) # test = String(take!(io)) - mtg, classes, description, features = parse_mtg_file(f, attr_type, mtg_type) + mtg, classes, description, features = parse_mtg_file(f, mtg_type) close(f) else # read the mtg file # f = open(file, "r") mtg, classes, description, features = open(file, "r") do f - parse_mtg_file(f, attr_type, mtg_type) + parse_mtg_file(f, mtg_type) end end @@ -83,7 +73,7 @@ function read_mtg(file, attr_type=ColumnarStore, mtg_type=MutableNodeMTG; sheet_ return mtg end -function parse_mtg_file(f, attr_type, mtg_type) +function parse_mtg_file(f, mtg_type) line = [0] l = [""] l[1] = next_line!(f, line) @@ -132,7 +122,7 @@ function parse_mtg_file(f, attr_type, mtg_type) # Parse the mtg FEATURES section: if issection(l[1], "MTG") - global mtg = parse_mtg!(f, classes, features, line, l, attr_type, mtg_type) + global mtg = parse_mtg!(f, classes, features, line, l, mtg_type) continue end diff --git a/src/types/Node.jl b/src/types/Node.jl index c834f3eb..975dcafe 100644 --- a/src/types/Node.jl +++ b/src/types/Node.jl @@ -66,11 +66,6 @@ end @deprecate Node(name::String, id::Int, parent::Node, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:NamedTuple} Node(id, parent, MTG, attributes) @deprecate Node(name::String, id::Int, parent::Node, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:MutableNamedTuple} Node(id, parent, MTG, attributes) -# For the root: -function Node(id::Int, MTG::T, attributes::A) where {T<:AbstractNodeMTG,A} - Node(id, nothing, Vector{Node{T,A}}(), MTG, attributes, Dict{String,Vector{Node{T,A}}}()) -end - function Node(id::Int, MTG::T, attributes::ColumnarAttrs) where {T<:AbstractNodeMTG} node = Node{T,ColumnarAttrs}( id, nothing, Vector{Node{T,ColumnarAttrs}}(), MTG, attributes, Dict{String,Vector{Node{T,ColumnarAttrs}}}() @@ -80,28 +75,9 @@ function Node(id::Int, MTG::T, attributes::ColumnarAttrs) where {T<:AbstractNode end # If the id is not given, it is the root node, so we use 1 -Node(MTG::T, attributes) where {T<:AbstractNodeMTG} = Node(1, MTG, attributes) -# Not attributes given, by default we use Dict: -Node(id::Int, MTG::T) where {T<:AbstractNodeMTG} = Node(id, MTG, Dict{Symbol,Any}()) - -# Special case for the NamedTuple and MutableNamedTuple, else it overspecializes and we -# can't mutate attributes, i.e. we get somthing like -# Node{NodeMTG,MutableNamedTuple{(:a,), Tuple{Base.RefValue{Int64}}}} instead of just: -# Node{NodeMTG,MutableNamedTuple} -function Node(id::Int, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:MutableNamedTuple} - Node{M,MutableNamedTuple}(id, nothing, Vector{Node{M,MutableNamedTuple}}(), MTG, attributes, Dict{String,Vector{Node{M,MutableNamedTuple}}}()) -end - -function Node(id::Int, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:NamedTuple} - Node{M,NamedTuple}(id, nothing, Vector{Node{M,NamedTuple}}(), MTG, attributes, Dict{String,Vector{Node{M,MutableNamedTuple}}}()) -end - -# Add a node as a child of another node: -function Node(id::Int, parent::Node{M,A}, MTG::M, attributes::A) where {M<:AbstractNodeMTG,A} - node = Node(id, parent, Vector{Node{M,A}}(), MTG, attributes, Dict{String,Vector{Node{M,A}}}()) - addchild!(parent, node) - return node -end +Node(MTG::T, attributes) where {T<:AbstractNodeMTG} = Node(1, MTG, _to_columnar_attrs(attributes)) +Node(id::Int, MTG::T, attributes) where {T<:AbstractNodeMTG} = Node(id, MTG, _to_columnar_attrs(attributes)) +Node(id::Int, MTG::T) where {T<:AbstractNodeMTG} = Node(id, MTG, ColumnarAttrs()) function _to_columnar_attrs(attributes::ColumnarAttrs) attributes @@ -119,6 +95,10 @@ function _to_columnar_attrs(attributes::MutableNamedTuple) ColumnarAttrs(Dict{Symbol,Any}(pairs(attributes))) end +function _to_columnar_attrs(attributes) + throw(ArgumentError("Unsupported attribute container type $(typeof(attributes)); use ColumnarAttrs, AbstractDict, or NamedTuple-like values.")) +end + function Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes::ColumnarAttrs) where {M<:AbstractNodeMTG} node = Node{M,ColumnarAttrs}( id, parent, Vector{Node{M,ColumnarAttrs}}(), MTG, attributes, Dict{String,Vector{Node{M,ColumnarAttrs}}}() @@ -128,17 +108,8 @@ function Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes::Column return node end -function Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes::AbstractDict) where {M<:AbstractNodeMTG} - Node(id, parent, MTG, _to_columnar_attrs(attributes)) -end - -function Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes::NamedTuple) where {M<:AbstractNodeMTG} +Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes) where {M<:AbstractNodeMTG} = Node(id, parent, MTG, _to_columnar_attrs(attributes)) -end - -function Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes::MutableNamedTuple) where {M<:AbstractNodeMTG} - Node(id, parent, MTG, _to_columnar_attrs(attributes)) -end function Node(id::Int, parent::Node{M,A}, MTG::T, attributes::A) where {M<:AbstractNodeMTG,A,T<:AbstractNodeMTG} error( @@ -147,27 +118,16 @@ function Node(id::Int, parent::Node{M,A}, MTG::T, attributes::A) where {M<:Abstr ) end -# Special case for NamedTuple here: -function Node(id::Int, parent::Node, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:NamedTuple} - node = Node{M,NamedTuple}(id, parent, Vector{Node{M,NamedTuple}}(), MTG, attributes, Dict{String,Vector{Node{M,NamedTuple}}}()) - addchild!(parent, node) - return node -end - -# Idem for MutableNamedTuple here: -function Node(id::Int, parent::Node, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:MutableNamedTuple} - node = Node{M,MutableNamedTuple}(id, parent, Vector{Node{M,MutableNamedTuple}}(), MTG, attributes, Dict{String,Vector{Node{M,MutableNamedTuple}}}()) - addchild!(parent, node) - return node -end +Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M) where {M<:AbstractNodeMTG} = + Node(id, parent, MTG, ColumnarAttrs()) # If the id is not given, it is the root node, so we use 1 function Node(parent::Node, MTG::T, attributes) where {T<:AbstractNodeMTG} Node(new_id(get_root(parent)), parent, MTG, attributes) end -# Only the MTG is given, by default we use Dict as attributes: -Node(MTG::T) where {T<:AbstractNodeMTG} = Node(1, MTG, Dict{Symbol,Any}()) +# Only the MTG is given, by default we use ColumnarAttrs as attributes: +Node(MTG::T) where {T<:AbstractNodeMTG} = Node(1, MTG, ColumnarAttrs()) # Only the ID, MTG and parent are given, by default we use the parent attribute type: function Node(id::Int, parent::Node{N,A}, MTG::T) where {N<:AbstractNodeMTG,A,T<:AbstractNodeMTG} diff --git a/test/runtests.jl b/test/runtests.jl index c717df8d..ee1b045b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,6 @@ using Test using MultiScaleTreeGraph -using DataFrames, Dates, MutableNamedTuples +using DataFrames, Dates using Graphs, AbstractTrees @testset "read_mtg" begin diff --git a/test/test-caching.jl b/test/test-caching.jl index 786f26d7..0f65cda3 100644 --- a/test/test-caching.jl +++ b/test/test-caching.jl @@ -1,7 +1,7 @@ file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))), "test", "files", "simple_plant.mtg") @testset "caching" begin - mtg = read_mtg(file, Dict) + mtg = read_mtg(file) # Cache all leaf nodes: cache_nodes!(mtg, symbol=:Leaf) @@ -63,4 +63,4 @@ file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))), "test", "files", # Check that the nodes of the MTG where not modified: @test [node[:x] for node in AbstractTrees.PreOrderDFS(mtg)] == [1, 2, 3, 4, 5, 6, 7] -end \ No newline at end of file +end diff --git a/test/test-conversion.jl b/test/test-conversion.jl index 0c9a763c..febd1a7e 100644 --- a/test/test-conversion.jl +++ b/test/test-conversion.jl @@ -1,4 +1,4 @@ -mtg = read_mtg("files/simple_plant.mtg", Dict); +mtg = read_mtg("files/simple_plant.mtg"); @testset "DataFrame" begin df_mtg = DataFrame(mtg, [:scales, :Length]) @@ -10,4 +10,4 @@ end meta_mtg = MetaGraph(mtg) @test meta_mtg[1][:scales] == [0, 1, 2, 3, 3] @test meta_mtg[7][:Length] == 0.2 -end \ No newline at end of file +end diff --git a/test/test-insert_node.jl b/test/test-insert_node.jl index d6c496e9..485b8bb1 100644 --- a/test/test-insert_node.jl +++ b/test/test-insert_node.jl @@ -78,7 +78,8 @@ end @test length(mtg) == length_before + 1 @test node_mtg(mtg[1][1]) == template @test node_id(mtg[1][1]) == 8 - @test node_attributes(mtg[1][1]) == Dict{Symbol,Any}(:Total_Length => 0.6) + @test node_attributes(mtg[1][1]) isa MultiScaleTreeGraph.ColumnarAttrs + @test mtg[1][1][:Total_Length] == 0.6 end diff --git a/test/test-mutation.jl b/test/test-mutation.jl index b8e6e38b..108d4544 100644 --- a/test/test-mutation.jl +++ b/test/test-mutation.jl @@ -1,5 +1,5 @@ @testset "mutate_node!" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Using a leaf node from the mtg: leaf_node = get_node(mtg, 5) # Add a new attributes, x based on node field, y based on x and z using a function: @@ -21,7 +21,7 @@ end @testset "mutate_mtg!" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Using a leaf node from the mtg: leaf_node = get_node(mtg, 5) @@ -39,43 +39,15 @@ end end @testset "append!" begin - # On Dict attributes - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") append!(mtg, (test=3,)) @test haskey(mtg, :test) @test mtg[:test] == 3 - - # On MutableNamedTuple attributes - mtg = read_mtg("files/simple_plant.mtg", MutableNamedTuple) - # Using a leaf node from the mtg: - append!(mtg, (test=3,)) - @test haskey(mtg, :test) - @test mtg[:test] == 3 - - # On NamedTuple attributes - mtg = read_mtg("files/simple_plant.mtg", NamedTuple) - # Using a leaf node from the mtg: - append!(mtg, (test=3,)) - @test haskey(mtg, :test) - @test mtg[:test] == 3 end @testset "pop!" begin - # On Dict attributes - mtg = read_mtg("files/simple_plant.mtg", Dict) - pop!(mtg, :symbols) - @test !haskey(mtg, :symbols) - - # On MutableNamedTuple attributes - mtg = read_mtg("files/simple_plant.mtg", MutableNamedTuple) - # Using a leaf node from the mtg: - pop!(mtg, :symbols) - @test !haskey(mtg, :symbols) - - # On NamedTuple attributes - mtg = read_mtg("files/simple_plant.mtg", NamedTuple) - # Using a leaf node from the mtg: + mtg = read_mtg("files/simple_plant.mtg") pop!(mtg, :symbols) @test !haskey(mtg, :symbols) end diff --git a/test/test-nodes.jl b/test/test-nodes.jl index 1949917e..1afb4e12 100644 --- a/test/test-nodes.jl +++ b/test/test-nodes.jl @@ -63,7 +63,8 @@ end mtg_code = MultiScaleTreeGraph.NodeMTG(:/, :Plant, 1, 1) mtg = MultiScaleTreeGraph.Node(mtg_code) @test get_attributes(mtg) == [] - @test node_attributes(mtg) == Dict{Symbol,Any}() + @test node_attributes(mtg) isa MultiScaleTreeGraph.ColumnarAttrs + @test isempty(node_attributes(mtg)) @test node_id(mtg) == 1 @test parent(mtg) === nothing @test node_mtg(mtg) == mtg_code @@ -79,7 +80,8 @@ end @test parent(internode) == mtg @test node_id(internode) == 2 @test node_mtg(internode) == MultiScaleTreeGraph.NodeMTG(:/, :Internode, 1, 2) - @test node_attributes(internode) == Dict{Symbol,Any}() + @test node_attributes(internode) isa MultiScaleTreeGraph.ColumnarAttrs + @test isempty(node_attributes(internode)) @test children(mtg) == [internode] end @@ -117,6 +119,6 @@ end @testset "Adding a child with a different MTG encoding type" begin - mtg = read_mtg(file, Dict, MutableNodeMTG) + mtg = read_mtg(file, MutableNodeMTG) VERSION >= v"1.7" && @test_throws "The parent node has an MTG encoding of type `MutableNodeMTG`, but the MTG encoding you provide is of type `NodeMTG`, please make sure they are the same." Node(mtg, NodeMTG(:/, :Branch, 1, 2)) end diff --git a/test/test-read_mtg.jl b/test/test-read_mtg.jl index a02ce9bd..ed2f851e 100644 --- a/test/test-read_mtg.jl +++ b/test/test-read_mtg.jl @@ -1,4 +1,4 @@ -mtg = read_mtg("files/simple_plant.mtg", MutableNamedTuple, NodeMTG); +mtg = read_mtg("files/simple_plant.mtg", NodeMTG) classes = get_classes(mtg) features = get_features(mtg) @@ -7,139 +7,56 @@ features = get_features(mtg) @test size(classes) == (5, 5) @test String.(classes.SYMBOL) == ["Scene", "Individual", "Axis", "Internode", "Leaf"] @test classes.SCALE == [0, 1, 2, 3, 3] - @test classes.DECOMPOSITION == ["FREE" for i = 1:5] - @test classes.INDEXATION == ["FREE" for i = 1:5] - @test classes.DEFINITION == ["IMPLICIT" for i = 1:5] -end - -@testset "test description" begin - @test typeof(mtg[:description]) == DataFrame - @test size(mtg[:description]) == (2, 4) - @test mtg[:description].LEFT == ["Internode", "Internode"] - @test mtg[:description].RELTYPE == ["+", "<"] - @test mtg[:description].MAX == ["?", "?"] end @testset "test features" begin @test typeof(features) == DataFrame @test size(features) == (5, 2) - @test features.NAME == [:Length, :Width, :XEuler, :dateDeath, :isAlive] - @test features.TYPE == ["REAL", "REAL", "REAL", "DD/MM/YY", "BOOLEAN"] + @test sort(collect(features.NAME)) == sort([:Length, :Width, :XEuler, :dateDeath, :isAlive]) end @testset "test mtg content" begin @test length(mtg) == 7 - @test typeof(mtg) == Node{NodeMTG,MutableNamedTuple} + @test typeof(mtg) == Node{NodeMTG,MultiScaleTreeGraph.ColumnarAttrs} @test node_id(mtg) == 1 - @test node_attributes(mtg)[:scales] == [0, 1, 2, 3, 3] - @test node_attributes(mtg)[:symbols] == ["Scene", "Individual", "Axis", "Internode", "Leaf"] + @test node_attributes(mtg) isa MultiScaleTreeGraph.ColumnarAttrs + @test mtg[:scales] == [0, 1, 2, 3, 3] + @test mtg[:symbols] == ["Scene", "Individual", "Axis", "Internode", "Leaf"] @test node_mtg(mtg) == NodeMTG("/", "Scene", 0, 0) - @test typeof(children(mtg)) <: Vector{Node{NodeMTG,MutableNamedTuple}} - @test typeof(mtg[1]) == Node{NodeMTG,MutableNamedTuple} - @test node_id(mtg[1]) == 2 - @test parent(mtg[1]) === mtg + @test typeof(children(mtg)) <: Vector{Node{NodeMTG,MultiScaleTreeGraph.ColumnarAttrs}} leaf_1 = get_node(mtg, 5) @test leaf_1[:Length] == 0.2 @test leaf_1[:Width] == 0.1 @test leaf_1[:isAlive] == false @test leaf_1[:dateDeath] == Date("2022-08-24") - - leaf_2 = get_node(mtg, 7) - @test leaf_2[:Length] == 0.2 - @test leaf_2[:Width] == 0.1 - @test leaf_2[:isAlive] == true - - Internode_2 = get_node(mtg, 6) - @test Internode_2[:Length] == 0.1 - @test Internode_2[:Width] == 0.02 - @test Internode_2[:isAlive] == true end @testset "test mtg mutation" begin - @test (node_attributes(mtg)[:scales] .= [0, 1, 2, 3, 4]) == [0, 1, 2, 3, 4] - @test MultiScaleTreeGraph.node_mtg!(mtg, MultiScaleTreeGraph.NodeMTG(:<, :Leaf, 2, 0)) == MultiScaleTreeGraph.NodeMTG(:<, :Leaf, 2, 0) - reparent!(mtg[1], nothing) - @test parent(mtg[1]) === nothing - node2 = mtg[1] - rechildren!(mtg, [node2]) - @test children(mtg) == [node2] -end - -@testset "test mtg with NamedTuples" begin - mtg = read_mtg("files/simple_plant.mtg", NamedTuple, NodeMTG) - - @test length(mtg) == 7 - @test typeof(mtg) == Node{NodeMTG,NamedTuple} - @test node_id(mtg) == 1 - @test node_mtg(mtg) == MultiScaleTreeGraph.NodeMTG(:/, :Scene, 0, 0) - @test typeof(children(mtg)) == Vector{Node{NodeMTG,NamedTuple}} - @test node_id(mtg[1]) == 2 - @test parent(mtg[1]) === mtg -end - -@testset "test mtg with Dict" begin - mtg = read_mtg("files/simple_plant.mtg", Dict, NodeMTG) - @test length(mtg) == 7 - @test typeof(mtg) == Node{MultiScaleTreeGraph.NodeMTG,Dict{Symbol,Any}} - @test node_id(mtg) == 1 - @test node_attributes(mtg) == Dict(:symbols => ["Scene", "Individual", "Axis", "Internode", "Leaf"], - :scales => [0, 1, 2, 3, 3], :description => mtg[:description]) - @test node_mtg(mtg) == MultiScaleTreeGraph.NodeMTG(:/, :Scene, 0, 0) - @test typeof(children(mtg)) == Vector{Node{NodeMTG,Dict{Symbol,Any}}} - @test node_id(mtg[1]) == 2 - @test parent(mtg[1]) === mtg -end - -@testset "test mtg with Dict: mutation" begin - mtg = read_mtg("files/simple_plant.mtg", Dict, NodeMTG) - @test (node_attributes(mtg)[:scales] = [0, 1, 2, 3, 4]) == [0, 1, 2, 3, 4] + @test (mtg[:scales] .= [0, 1, 2, 3, 4]) == [0, 1, 2, 3, 4] @test MultiScaleTreeGraph.node_mtg!(mtg, MultiScaleTreeGraph.NodeMTG(:<, :Leaf, 2, 0)) == MultiScaleTreeGraph.NodeMTG(:<, :Leaf, 2, 0) reparent!(mtg[1], nothing) @test parent(mtg[1]) === nothing - node2 = mtg[1] - rechildren!(mtg, [node2]) - @test children(mtg) == [node2] end - @testset "test mtg with empty lines" begin - mtg = read_mtg("files/simple_plant.mtg") + mtg1 = read_mtg("files/simple_plant.mtg") mtg2 = read_mtg("files/simple_plant-blanks.mtg") - - MTG1 = traverse(mtg) do x - (node_mtg(x), node_attributes(x)) - end - - MTG2 = traverse(mtg2) do x - (node_mtg(x), node_attributes(x)) - end - - @test MTG1 == MTG2 + @test traverse(mtg1, node_mtg) == traverse(mtg2, node_mtg) + @test traverse(mtg1, n -> Dict(pairs(node_attributes(n)))) == traverse(mtg2, n -> Dict(pairs(node_attributes(n)))) end @testset "mtg with several nodes in the same line" begin - mtg = read_mtg("files/simple_plant.mtg") + mtg1 = read_mtg("files/simple_plant.mtg") mtg2 = read_mtg("files/simple_plant-P1U1.mtg") - - MTG1 = traverse(mtg) do x - (node_mtg(x), node_attributes(x)) - end - - MTG2 = traverse(mtg2) do x - (node_mtg(x), node_attributes(x)) - end - - @test MTG1 == MTG2 + @test traverse(mtg1, node_mtg) == traverse(mtg2, node_mtg) + @test traverse(mtg1, n -> Dict(pairs(node_attributes(n)))) == traverse(mtg2, n -> Dict(pairs(node_attributes(n)))) end - @testset "mtg with no attributes" begin mtg = read_mtg("files/palm.mtg") - @test names(mtg) == [:scales, :description, :symbols] - traverse(mtg) do x - !MultiScaleTreeGraph.isroot(x) && @test node_attributes(x) == Dict{Symbol,Any}() + !MultiScaleTreeGraph.isroot(x) && @test isempty(node_attributes(x)) end end diff --git a/test/test-transform.jl b/test/test-transform.jl index 45fa7bd2..754e5037 100644 --- a/test/test-transform.jl +++ b/test/test-transform.jl @@ -1,5 +1,5 @@ @testset "transform!" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Test `function` form: transform!(mtg, x -> x[:x] = node_id(x)) diff --git a/test/test-traverse.jl b/test/test-traverse.jl index 886954df..9cb3624b 100644 --- a/test/test-traverse.jl +++ b/test/test-traverse.jl @@ -1,5 +1,5 @@ @testset "traverse!" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Add a new attributes, x based on node field, y based on x and z using a function: traverse!(mtg, x -> x[:x] = length(node_id(x))) @test mtg[:x] == 1 @@ -11,7 +11,7 @@ end @testset "traverse" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Add a new attributes, x based on node field, y based on x and z using a function: node_ids = traverse(mtg, x -> node_id(x)) @test node_ids == collect(1:length(mtg)) @@ -24,7 +24,7 @@ end end @testset "traverse + filters" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Add a new attributes, x based on node field, y based on x and z using a function: @test traverse(mtg, x -> x, symbol="Internode") == [get_node(mtg, 4), get_node(mtg, 6)] @test traverse(mtg, x -> x, symbol=:Internode) == [get_node(mtg, 4), get_node(mtg, 6)] diff --git a/test/test-write_mtg.jl b/test/test-write_mtg.jl index 660fd8f9..41429d8f 100644 --- a/test/test-write_mtg.jl +++ b/test/test-write_mtg.jl @@ -18,12 +18,12 @@ mtg[:description] = nothing end -mtg = read_mtg("files/simple_plant.mtg", NamedTuple, NodeMTG) +mtg = read_mtg("files/simple_plant.mtg", NodeMTG) -@testset "Test write / read again: simple plant + NamedTuple" begin +@testset "Test write / read again: simple plant + NodeMTG" begin mtg2 = mktemp() do f, io write_mtg(f, mtg) - mtg2 = read_mtg(f, NamedTuple, NodeMTG) + mtg2 = read_mtg(f, NodeMTG) return mtg2 end @@ -88,4 +88,4 @@ mtg = read_mtg(file) @test descendants(mtg, :azimuth) == descendants(mtg2, :azimuth) @test traverse(mtg, symbol) == traverse(mtg2, symbol) @test traverse(mtg, index) == traverse(mtg2, index) -end \ No newline at end of file +end From f34764ea0f6ec1348a78b34c194506ebea6e28f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Vezy?= Date: Wed, 11 Feb 2026 16:00:16 +0100 Subject: [PATCH 6/7] Update benchmarks.jl Fix write_mtg argument order --- benchmark/benchmarks.jl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 54865c64..0c0d8514 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -1,8 +1,3 @@ -using Pkg -Pkg.activate(dirname(@__FILE__)) -Pkg.develop(PackageSpec(path=dirname(@__DIR__))) -Pkg.instantiate() - using BenchmarkTools using MultiScaleTreeGraph using Random @@ -294,7 +289,7 @@ function build_tier!(suite, tier_name::String, n_nodes::Int) data_ = synthetic_mtg(n_nodes=3_000, seed=666) mtg_ = data_.root f = tempname() * ".mtg" - write_mtg(mtg_, f) + write_mtg(f, mtg_) rm(f, force=true) end end From 4f576f68c1828acabe79f13dd17d31552dcc40a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Vezy?= Date: Wed, 11 Feb 2026 16:00:27 +0100 Subject: [PATCH 7/7] Update Project.toml Use source for pkg path --- benchmark/Project.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchmark/Project.toml b/benchmark/Project.toml index a1d26338..cc73667f 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -2,3 +2,6 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" MultiScaleTreeGraph = "dd4a991b-8a45-4075-bede-262ee62d5583" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" + +[sources] +MultiScaleTreeGraph = {path = ".."} \ No newline at end of file