diff --git a/AGENT.md b/AGENT.md index c99dd9c..6cc0b2c 100644 --- a/AGENT.md +++ b/AGENT.md @@ -21,6 +21,7 @@ - `src/compute_MTG/indexing.jl` - `src/compute_MTG/check_filters.jl` - `src/types/Node.jl` +- `src/types/Attributes.jl` - `src/compute_MTG/node_funs.jl` ## Practical Optimization Rules @@ -30,4 +31,14 @@ - `descendants!(buffer, node, key; ...)` - Keep filter checks branch-light when no filters are provided. - Keep key access on typed attribute containers (`NamedTuple`, `MutableNamedTuple`, typed dicts) in specialized methods when possible. +- Prefer explicit attribute APIs: + - `attribute(node, key)` + - `attribute!(node, key, value)` + - `attributes(node; format=:namedtuple|:dict)` + - `add_column!` / `drop_column!` / `rename_column!` - Preserve API behavior and add tests for every optimization that changes internals. + +## Benchmark Tiers +- `small` (~10k nodes): full matrix including API-surface benchmarks +- `medium` (~100k nodes): hot-path matrix +- `large` (~300k nodes): critical hot paths only diff --git a/Project.toml b/Project.toml index 1cade87..f11ef08 100644 --- a/Project.toml +++ b/Project.toml @@ -14,6 +14,7 @@ MutableNamedTuples = "af6c499f-54b4-48cc-bbd2-094bba7533c7" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" [compat] @@ -25,6 +26,7 @@ MetaGraphsNext = "0.5, 0.6, 0.7" MutableNamedTuples = "0.1" OrderedCollections = "1.4" SHA = "0.7, 1" +Tables = "1" XLSX = "0.7, 0.8, 0.9, 0.10" julia = "1.6" diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 9778dd9..cc73667 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -1,3 +1,7 @@ [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" MultiScaleTreeGraph = "dd4a991b-8a45-4075-bede-262ee62d5583" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" + +[sources] +MultiScaleTreeGraph = {path = ".."} \ No newline at end of file diff --git a/benchmark/README.md b/benchmark/README.md index 10228f1..5d81609 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -10,6 +10,21 @@ julia --project=benchmark benchmark/benchmarks.jl Workloads currently covered: -- full-tree traversal -- traversal + data extraction from descendants -- repeated many-small queries (`children`, `parent`, `ancestors`) +- tiered datasets: `small` (~10k nodes), `medium` (~100k), `large` (~300k) +- full-tree traversal and traversal updates + - one attribute update on all nodes + - multi-attribute update on one symbol (`:Leaf`) + - multi-attribute update on mixed symbols (`[:Leaf, :Internode]`) +- descendants queries + - one/many attributes, one symbol + - one/many attributes, mixed symbols + - `ignore_nothing=true/false` + - in-place and allocating variants +- repeated many-small queries + - `children`, `parent`, `ancestors`, `ancestors!` + - repeated descendants retrieval with and without in-place buffers +- API surface (small tier) + - insertion/deletion/pruning + - `transform!`, `select!` + - `symbol_table` / `mtg_table` + - `write_mtg` diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index f35bb4b..0c0d851 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -1,57 +1,101 @@ -using Pkg -Pkg.activate(dirname(@__FILE__)) -Pkg.develop(PackageSpec(path=dirname(@__DIR__))) -Pkg.instantiate() - using BenchmarkTools using MultiScaleTreeGraph using Random +using Tables const SUITE = BenchmarkGroup() +const HAS_EXPLICIT_ATTRIBUTE_API = isdefined(MultiScaleTreeGraph, :attribute) && isdefined(MultiScaleTreeGraph, :attribute!) +const HAS_TABLE_VIEWS_API = isdefined(MultiScaleTreeGraph, :symbol_table) && isdefined(MultiScaleTreeGraph, :mtg_table) + +const SIZE_TIERS = ( + small=10_000, + medium=100_000, + large=300_000, +) -function synthetic_tree(; branching::Int=4, depth::Int=8, nsamples::Int=256, seed::Int=42) +function synthetic_mtg(; n_nodes::Int=10_000, seed::Int=42) rng = MersenneTwister(seed) root = Node( 1, - NodeMTG("/", "Scale1", 1, 1), + MutableNodeMTG(:/, :Plant, 1, 1), Dict{Symbol,Any}( :mass => rand(rng), - :area => rand(rng), :height => rand(rng), + :temperature => 20.0, ), ) - frontier = [root] + candidates = Node[root] + leaves = Node[] + internodes = Node[] + all_nodes = Node[root] + next_id = 1 + while next_id < n_nodes + parent_ = candidates[rand(rng, eachindex(candidates))] + next_id += 1 - for d in 1:depth - next_frontier = Node{NodeMTG,Dict{Symbol,Any}}[] - sym = "Scale$(d + 1)" - sc = d + 1 - for p in frontier - for b in 1:branching - next_id += 1 - child = Node( - next_id, - p, - NodeMTG("<", sym, b, sc), - Dict{Symbol,Any}( - :mass => rand(rng), - :area => rand(rng), - :height => rand(rng), - ), - ) - push!(next_frontier, child) - end + roll = rand(rng) + if roll < 0.60 + sym = :Internode + scale_ = 2 + link_ = :< + attrs = Dict{Symbol,Any}( + :mass => rand(rng), + :Length => rand(rng), + :Diameter => rand(rng), + ) + elseif roll < 0.95 + sym = :Leaf + scale_ = 3 + link_ = :+ + attrs = Dict{Symbol,Any}( + :mass => rand(rng), + :Width => rand(rng), + :Area => rand(rng), + ) + else + sym = :Axis + scale_ = 2 + link_ = :< + attrs = Dict{Symbol,Any}( + :mass => rand(rng), + :Length => rand(rng), + ) + end + + child = Node(next_id, parent_, MutableNodeMTG(link_, sym, 1, scale_), attrs) + push!(all_nodes, child) + + if sym == :Leaf + push!(leaves, child) + else + push!(internodes, child) + push!(candidates, child) end - frontier = next_frontier end - all_nodes = traverse(root, x -> x, type=typeof(root)) - sample_nodes = rand(rng, all_nodes, min(nsamples, length(all_nodes))) + sample_size = min(512, length(all_nodes)) + sample_nodes = rand(rng, all_nodes, sample_size) + sample_leaves = isempty(leaves) ? sample_nodes : rand(rng, leaves, min(512, length(leaves))) - return root, frontier, sample_nodes + return (root=root, leaves=leaves, internodes=internodes, all_nodes=all_nodes, sample_nodes=sample_nodes, sample_leaves=sample_leaves) +end + +@inline function _attr_get(node, key::Symbol; default=nothing) + if HAS_EXPLICIT_ATTRIBUTE_API + return attribute(node, key, default=default) + end + return get(node_attributes(node), key, default) +end + +@inline function _attr_set!(node, key::Symbol, value) + if HAS_EXPLICIT_ATTRIBUTE_API + return attribute!(node, key, value) + end + node[key] = value + return value end function children_workload(nodes, reps::Int) @@ -79,9 +123,9 @@ function ancestors_workload(nodes, reps::Int) s = 0.0 @inbounds for _ in 1:reps for n in nodes - vals = ancestors(n, :mass, recursivity_level=4, type=Float64) + vals = ancestors(n, :Length, recursivity_level=4) for v in vals - s += v + v === nothing || (s += v) end end end @@ -90,34 +134,165 @@ end function ancestors_workload_inplace(nodes, reps::Int) s = 0.0 - buf = Float64[] + buf = Union{Nothing,Float64}[] @inbounds for _ in 1:reps for n in nodes - ancestors!(buf, n, :mass, recursivity_level=4) + ancestors!(buf, n, :Length, recursivity_level=4) for v in buf - s += v + v === nothing || (s += v) end end end return s end -function descendants_extraction_workload(root) - vals = descendants(root, :mass, type=Float64) +function descendants_repeated_workload(root, reps::Int) s = 0.0 - @inbounds for v in vals - s += v + @inbounds for _ in 1:reps + vals = descendants(root, :Length, symbol=:Internode, ignore_nothing=true) + for v in vals + s += v + end end return s end -function descendants_extraction_workload_inplace_1(root) - descendants!(root, :mass, type=Float64) +function descendants_repeated_workload_inplace(root, reps::Int) + s = 0.0 + buf = Float64[] + @inbounds for _ in 1:reps + descendants!(buf, root, :Length, symbol=:Internode, ignore_nothing=true) + for v in buf + s += v + end + end + return s +end + +function traverse_update_one!(root) + traverse!(root) do node + m = _attr_get(node, :mass, default=0.0) + _attr_set!(node, :mass, m + 0.1) + end + return nothing end -function descendants_extraction_workload_inplace_2(root) - vals = Float64[] - descendants!(vals, root, :mass) +function traverse_update_multi_leaf!(root) + traverse!(root, symbol=:Leaf) do node + width = _attr_get(node, :Width, default=0.0) + area = _attr_get(node, :Area, default=0.0) + _attr_set!(node, :Width, width * 1.001) + _attr_set!(node, :Area, area + width) + end + return nothing +end + +function traverse_update_multi_mixed!(root) + traverse!(root, symbol=(:Leaf, :Internode)) do node + m = _attr_get(node, :mass, default=0.0) + _attr_set!(node, :mass, m * 0.999 + 0.0001) + _attr_set!(node, :update_counter, _attr_get(node, :update_counter, default=0) + 1) + end + return nothing +end + +function descendants_tuple_workload(root) + vals = descendants(root, [:Length, :mass], symbol=:Internode, ignore_nothing=true) + s = 0.0 + for v in vals + s += v[1] + v[2] + end + return s +end + +function descendants_mixed_one(root; ignore_nothing::Bool) + descendants(root, :Length, symbol=(:Leaf, :Internode), ignore_nothing=ignore_nothing) +end + +function descendants_mixed_many(root; ignore_nothing::Bool) + descendants(root, [:Length, :Area], symbol=(:Leaf, :Internode), ignore_nothing=ignore_nothing) +end + +function build_tier!(suite, tier_name::String, n_nodes::Int) + seed = Int(mod(hash(tier_name), typemax(Int))) + data = synthetic_mtg(n_nodes=n_nodes, seed=seed) + root = data.root + sample_nodes = data.sample_nodes + sample_leaves = data.sample_leaves + + tier = BenchmarkGroup() + suite[tier_name] = tier + + tier["traverse"]["full_tree_nodes"] = @benchmarkable traverse!($root, _ -> nothing) + + tier["traverse_update"]["one_attr_all_nodes"] = @benchmarkable traverse_update_one!($root) + tier["traverse_update"]["multi_attr_leaf_only"] = @benchmarkable traverse_update_multi_leaf!($root) + tier["traverse_update"]["multi_attr_leaf_internode"] = @benchmarkable traverse_update_multi_mixed!($root) + + tier["descendants_query"]["one_attr_one_symbol"] = @benchmarkable descendants($root, :Length, symbol=:Internode, ignore_nothing=true) + tier["descendants_query"]["many_attr_one_symbol"] = @benchmarkable descendants_tuple_workload($root) + tier["descendants_query"]["one_attr_mixed_keep_nothing"] = @benchmarkable descendants_mixed_one($root, ignore_nothing=false) + tier["descendants_query"]["one_attr_mixed_ignore_nothing"] = @benchmarkable descendants_mixed_one($root, ignore_nothing=true) + tier["descendants_query"]["many_attr_mixed_keep_nothing"] = @benchmarkable descendants_mixed_many($root, ignore_nothing=false) + tier["descendants_query"]["many_attr_mixed_ignore_nothing"] = @benchmarkable descendants_mixed_many($root, ignore_nothing=true) + + tier["descendants_query"]["one_attr_one_symbol_inplace"] = @benchmarkable begin + buf = Float64[] + descendants!(buf, $root, :Length, symbol=:Internode, ignore_nothing=true) + end + + tier["many_queries"]["children_repeated"] = @benchmarkable children_workload($sample_nodes, 300) + tier["many_queries"]["parent_repeated"] = @benchmarkable parent_workload($sample_nodes, 300) + tier["many_queries"]["ancestors_repeated"] = @benchmarkable ancestors_workload($sample_leaves, 40) + tier["many_queries"]["ancestors_repeated_inplace"] = @benchmarkable ancestors_workload_inplace($sample_leaves, 40) + tier["many_queries"]["descendants_repeated"] = @benchmarkable descendants_repeated_workload($root, 30) + tier["many_queries"]["descendants_repeated_inplace"] = @benchmarkable descendants_repeated_workload_inplace($root, 30) + + if tier_name == "small" + tier["api_surface_small_only"]["insert_child"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=8_000, seed=111) + mtg_ = data_.root + insert_child!(mtg_[1], MutableNodeMTG(:<, :Internode, 1, 2), x -> Dict{Symbol,Any}(:mass => 0.1), [max_id(mtg_)]) + end + + tier["api_surface_small_only"]["delete_node"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=8_000, seed=222) + mtg_ = data_.root + target = get_node(mtg_, node_id(mtg_[1])) + delete_node!(target) + end + + tier["api_surface_small_only"]["prune_subtree"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=8_000, seed=333) + mtg_ = data_.root + prune!(mtg_[1]) + end + + tier["api_surface_small_only"]["transform"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=8_000, seed=444) + mtg_ = data_.root + transform!(mtg_, :mass => (x -> x + 1.0) => :mass2, ignore_nothing=true) + end + + tier["api_surface_small_only"]["select"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=8_000, seed=555) + mtg_ = data_.root + select!(mtg_, :mass, :Length, ignore_nothing=true) + end + + if HAS_TABLE_VIEWS_API + tier["api_surface_small_only"]["tables_symbol"] = @benchmarkable symbol_table($root, :Leaf) + tier["api_surface_small_only"]["tables_unified"] = @benchmarkable mtg_table($root) + end + + tier["api_surface_small_only"]["write_mtg"] = @benchmarkable begin + data_ = synthetic_mtg(n_nodes=3_000, seed=666) + mtg_ = data_.root + f = tempname() * ".mtg" + write_mtg(f, mtg_) + rm(f, force=true) + end + end end suite_name = "mstg" @@ -129,29 +304,14 @@ elseif Sys.islinux() suite_name *= "_linux" end -SUITE[suite_name] = BenchmarkGroup([ - "traverse", - "traverse_extract", - "many_queries", -]) - -root, leaves, sample_nodes = synthetic_tree() -SUITE[suite_name]["traverse"]["full_tree_nodes"] = @benchmarkable traverse!($root, _ -> nothing) -SUITE[suite_name]["traverse_extract"]["descendants_mass"] = @benchmarkable descendants_extraction_workload($root) -SUITE[suite_name]["traverse_extract"]["descendants_mass_inplace_1"] = @benchmarkable descendants_extraction_workload_inplace_1($root) +SUITE[suite_name] = BenchmarkGroup() -# Add this one only if we have a method for `descendants!(val, node, key, type)` -if hasmethod(descendants!, Tuple{AbstractVector,Node,Symbol}) - SUITE[suite_name]["traverse_extract"]["descendants_mass_inplace_2"] = @benchmarkable descendants_extraction_workload_inplace_2($root) -end +build_tier!(SUITE[suite_name], "small", SIZE_TIERS.small) +build_tier!(SUITE[suite_name], "medium", SIZE_TIERS.medium) +build_tier!(SUITE[suite_name], "large", SIZE_TIERS.large) -SUITE[suite_name]["many_queries"]["children_repeated"] = @benchmarkable children_workload($sample_nodes, 300) -SUITE[suite_name]["many_queries"]["parent_repeated"] = @benchmarkable parent_workload($sample_nodes, 300) -SUITE[suite_name]["many_queries"]["ancestors_repeated"] = @benchmarkable ancestors_workload($leaves, 40) -# Test if ancestors! exists in the package first: -if isdefined(MultiScaleTreeGraph, :ancestors!) - SUITE[suite_name]["many_queries"]["ancestors_repeated_inplace"] = @benchmarkable ancestors_workload_inplace($leaves, 40) -end +# Keep the largest tier focused on critical hot paths. +delete!(SUITE[suite_name]["large"], "api_surface_small_only") if abspath(PROGRAM_FILE) == @__FILE__ results = run(SUITE, verbose=true) diff --git a/docs/src/api.md b/docs/src/api.md index c667978..71a0326 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -2,6 +2,13 @@ Here is a list of all exported functions from MultiScaleTreeGraph.jl. For more details, click on the link and you'll be directed to the function help. +Notable attribute APIs: + +- `attribute`, `attribute!`, `attributes`, `attribute_names` +- `add_column!`, `drop_column!`, `rename_column!` +- `descendants_strategy`, `descendants_strategy!` (choose automatic/direct/indexed descendants retrieval) +- `symbol_table`, `mtg_table` (`Tables.jl` sources) + ```@index ``` diff --git a/docs/src/index.md b/docs/src/index.md index 46f7de1..066844e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -17,7 +17,7 @@ Documentation for [MultiScaleTreeGraph.jl](https://github.com/VEZY/MultiScaleTre The goal of MultiScaleTreeGraph.jl is to read, write, analyse and plot MTG (Multi-scale Tree Graph) files. -The Multi-Scale Tree Graph, or MTG, is a data structure used to encode a plant to describe its topology (*i.e.* structure) and any attributes (*e.g.* geometry, colours, state...). It was developed in the [AMAP lab](https://amap.cirad.fr/) in the 90's to cope with the need of a generic yet scalable structure for plant topology and traits measurement, analysis and modelling. +The Multi-Scale Tree Graph (MTG) is a format used to describe plant structure (topology) and associated attributes (*e.g.* geometry, colours, state). It was developed in the [AMAP lab](https://amap.cirad.fr/) in the 90's to provide a generic and scalable way to represent plant topology and measurements. The format is described in details in the original paper from Godin et Caraglio (1998). diff --git a/docs/src/the_mtg/mtg_concept.md b/docs/src/the_mtg/mtg_concept.md index b4f1da6..4f6765e 100644 --- a/docs/src/the_mtg/mtg_concept.md +++ b/docs/src/the_mtg/mtg_concept.md @@ -2,7 +2,7 @@ ## Introduction -The Multi-scale Tree Graph -or MTG for short- is a data structure for describing a tree-alike object at one or several scales. +The Multi-scale Tree Graph (MTG) is a format for describing a plant-like branching object at one or several scales. Let's take a grass plant as an example: @@ -20,7 +20,7 @@ If we get closer again, each axis can be described with more details, and we can *Fig. 2. A grass plant described at three different scales: (a) the plant, (b) the axis and (c) the phytomer. The corresponding MTG graph (mono-scale) is shown on the right of each description.* -Figure 2 shows a different graph for each scale used for the description, but we can join all scales into a single MTG instead. In the MTG, all scales live together in the same data structure. The elementary object is called a node. The nodes are denoted by the little circles in Figure 2. There is one node for the plant scale (Fig. 2a), two nodes for the axis because there are two tillers (Fig. 2b), and six nodes for the phytomers, one for each (Fig. 2c). +Figure 2 shows a different graph for each scale, but we can also merge all scales into one MTG. In an MTG, all scales live together in one structure. The basic element is a **node** (the circles in Figure 2). There is one node for the plant scale (Fig. 2a), two nodes for axes because there are two tillers (Fig. 2b), and six nodes for phytomers, one for each (Fig. 2c). The resulting MTG describing all scales at once in the same graph can be represented as follows: diff --git a/docs/src/the_mtg/our_implementation.md b/docs/src/the_mtg/our_implementation.md index 52b2e2e..e411193 100644 --- a/docs/src/the_mtg/our_implementation.md +++ b/docs/src/the_mtg/our_implementation.md @@ -9,9 +9,12 @@ mtg = read_mtg(file) node_6 = get_node(mtg, 6) ``` -In this package, the MTG is represented as a [tree data structure](https://en.wikipedia.org/wiki/Tree_%28data_structure%29). +In this package, the MTG is represented as a tree (nodes linked by parent/children relationships). -The tree is built from a series of nodes with different fields that describe the topology (*i.e.* how nodes are connected together) and the attributes of the node. +The tree is built from nodes. Each node stores: + +- how it is connected to other nodes (its topology) +- its own measured or computed attributes !!! note The package use terms from computer science rather than plant biology. So we use words such as "root" in an MTG, which is not the plant root, but the first node in the tree, *i.e.* the one without any parent. Similarly a leaf node is not a leaf from a plant but a node without any children. @@ -24,37 +27,39 @@ The nodes have their own data type called [`Node`](@ref). A [`Node`](@ref) has s fieldnames(Node) ``` -Here is a little description of each field: +Here is a simple description of each field: - `id`: The unique integer identifier of the node. It can be set by the user but is usually set automatically. - `parent`: The parent node of the curent node. If the curent node is the root node, it will return `nothing`. You can test whether a node is a root node sing the [`isroot`](@ref) function. -- children: a dictionary of the children nodes with their `id` as key, or `nothing` if none; +- `children`: the child nodes. - `MTG`: The MTG encoding of the node (see below, or [`NodeMTG`](@ref)) -- `attributes`: the node attributes. Usually a `NamedTuple`, a `MutableNamedTuple` or a `Dict` or similar (e.g. `OrderedDict`), but the type is optional. The choice of the data structure depends mainly on how much you plan to change the attributes and their values. Attributes include for example the length or diameter of a node, its colour, 3d position... -- `traversal_cache`: a cache for the traversal, used by *e.g.* [`traverse`](@ref) to traverse more efficiently particular nodes in the MTG +- `attributes`: node values (for example length, diameter, color, 3D position). +- `traversal_cache`: saved traversal results used to speed up repeated operations. -The value of tee fields are accessed using accessor functions: [`node_id`](@ref), [`parent`](@ref), [`children`](@ref), [`node_mtg`](@ref), [`node_attributes`](@ref), and the last one `get_traversal_cache` which is not exported because users shouldn't use it directly. +The values of these fields are accessed with helper functions such as [`node_id`](@ref), [`parent`](@ref), [`children`](@ref), [`node_mtg`](@ref), and [`node_attributes`](@ref). -The MTG field of a node describes the topology encoding of the node: its type of link with its parent (decompose: `/`, follow: `<`, and branch: `+`), its symbol, index, and scale (see [Node MTG and attributes](@ref) and [The MTG section](@ref) for more details). The MTG field must be encoded in a data structure called [`NodeMTG`](@ref) or in a [`MutableNodeMTG`](@ref). They have four fields corresponding to the topology encoding: +The MTG field of a node describes how the node is positioned in the graph: link with parent (`/`, `<`, `+`), symbol, index, and scale (see [Node MTG and attributes](@ref) and [The MTG section](@ref) for more details). It is stored as [`NodeMTG`](@ref) or [`MutableNodeMTG`](@ref). These types have four fields: ```@example usepkg fieldnames(NodeMTG) ``` -Creating a [`NodeMTG`](@ref) is very simple, just pass the arguments by position. For example if we have an Axis that decomposes its parent node ("/"), with an index 0 and a scale of 1, we would declare it as follows: +Creating a [`NodeMTG`](@ref) is simple: pass the four values in order. For example, an Axis that decomposes its parent (`"/"`), with index `0` and scale `1`: ```@example usepkg axis_mtg_encoding = NodeMTG("/", "Axis", 0, 1) ``` -The we can access the data using the dot syntax: +Then we can access data using dot syntax: ```@example usepkg axis_mtg_encoding.symbol ``` !!! note - [`NodeMTG`](@ref) is the immutable data type, meaning that information cannot be changed once read. By default the package the mutable equivalent called [`MutableNodeMTG`](@ref). Accessing the information of a mutable data structure is slower, but it is more convenient if we need to change its values. + [`NodeMTG`](@ref) is immutable (cannot be changed after creation). + [`MutableNodeMTG`](@ref) can be changed. + Use mutable if you plan to edit node topology fields. ## Learning by example @@ -79,7 +84,7 @@ typeof(mtg) ``` !!! note - The [`Node`](@ref) is a parametric type, that's why `typeof(mtg)` also returns the type used for the MTG data in the node (`MutableNodeMTG`) and the type used for the attributes (`Dict{Symbol, Any}`). But this is not important here. + `typeof(mtg)` shows extra type details (including MTG encoding type and attribute container type). You usually do not need to worry about these details to use the package. We can access the fields of the node using the accessor functions: @@ -122,4 +127,4 @@ index(mtg) ```@example usepkg scale(mtg) -``` \ No newline at end of file +``` diff --git a/docs/src/tutorials/1.manipulate_node.md b/docs/src/tutorials/1.manipulate_node.md index d5c3c2b..9901444 100644 --- a/docs/src/tutorials/1.manipulate_node.md +++ b/docs/src/tutorials/1.manipulate_node.md @@ -29,13 +29,13 @@ typeof(mtg) This node is the root node of the MTG, meaning the first node of the MTG, the one without any parent. !!! note - Root node and leaf node mean a node without any parent or children respectively. These terms are used in the sense of a [tree data structure](https://en.wikipedia.org/wiki/Tree_(data_structure)). + Root node and leaf node mean a node without any parent or children respectively. You can think of this like a family tree: the root is the first ancestor, and leaves are the endpoints. ## The Node type ### What's a node? -As we learned in the previous section, the node is used as the elementary object to build the MTG. In this package, a node is a data structure used to hold these informations (*i.e.* fields). See the [MTG implementation](@ref) section for more details. +As we learned in the previous section, the node is the basic building block of an MTG. In practice, it is just an object that stores information in named fields. See the [MTG implementation](@ref) section for more details. ### Access the fields of a node @@ -53,12 +53,12 @@ These fields are considered internal to the package, and are not meant to be acc getfield(mtg, :MTG) ``` -But the preferred way for accessing such values is to use the accessor functions provided by the package: [`parent`](@ref), [`children`](@ref), [`node_attributes`](@ref), [`node_mtg`](@ref), [`node_id`](@ref). +But the preferred way for accessing such values is to use the accessor functions provided by the package: [`parent`](@ref), [`children`](@ref), [`attribute`](@ref), [`attributes`](@ref), [`node_mtg`](@ref), [`node_id`](@ref). So to get the values of the attributes (*i.e.* the variables), you can use: ```@example usepkg -node_attributes(mtg) +attributes(mtg, format=:dict) ``` The `MTG` field from the node helps us describe the node within the MTG. Let's see what's in it: @@ -152,3 +152,25 @@ get_root(node_5) ### Get the attributes This section has its own tutorial! Head over the next page to learn how to get the nodes attributes. + +## Columnar Attribute Backend + +By default, `read_mtg` now loads attributes using a typed columnar backend (per symbol). The node API remains simple: + +```@example usepkg +leaf = get_node(mtg, 5) +attribute(leaf, :Width) +attribute!(leaf, :new_var, 1.0) +attribute_names(leaf) +attributes(leaf, format=:namedtuple) +``` + +For schema-level operations on one symbol: + +```@example usepkg +add_column!(mtg, :Leaf, :temperature, Float64, default=20.0) +rename_column!(mtg, :Leaf, :temperature, :temp) +drop_column!(mtg, :Leaf, :temp) +``` + +`node_attributes(node)` is still available for compatibility, but new code should prefer `attribute`/`attribute!`/`attributes`. diff --git a/docs/src/tutorials/2.descendants_ancestors_filters.md b/docs/src/tutorials/2.descendants_ancestors_filters.md index 5ba2a50..1420f33 100644 --- a/docs/src/tutorials/2.descendants_ancestors_filters.md +++ b/docs/src/tutorials/2.descendants_ancestors_filters.md @@ -54,7 +54,9 @@ The previous notations are both equivalent to: node_attributes(node_5)[:Length] ``` -But we strongly recommend to avoid this last notation. In our case the attributes are stored in a Dictionary (Dict, the default), so we access their values using the Dict notation: `node_attributes(node_5)[:Length]`. But if the attributes are stored as a NamedTuple-alike structure, we must use the dot notation instead: `node_attributes(node_5).Length` (see [Attributes type](@ref) for more details). That is why the package implements the more generic `node_5[:Length]` notation that works with any structure used for the attributes, which helps develop more generic code. +But we strongly recommend to avoid this last notation. First, in our case the attributes are stored in a Dictionary (Dict, the default), so we access their values using the Dict notation: `node_attributes(node_5)[:Length]`. But if the attributes are stored as a NamedTuple-alike structure, we must use the dot notation instead: `node_attributes(node_5).Length` (see [Attributes type](@ref) for more details). Second, the attributes of the nodes are not stored in the structure returned by `node_attributes`, so updating the value of an attribute using this notation does not update the value in the MTG. This is a common mistake that can lead to very hard-to-debug issues. + + That is why the package implements the more generic `node_5[:Length]` notation that works with any structure used for the attributes, which helps develop more generic code. To get the names of all attributes available in the node subtree, you can use [`get_attributes`](@ref): @@ -96,7 +98,7 @@ For example one could be interested in computing the total length of all nodes i descendants(mtg, :Length) ``` -The `descendants` function visits every children nodes recursively until finding a leaf node. It returns the values in the same order than the visited nodes. +The `descendants` function visits each child, then each child of each child, and so on until leaves. It returns values in the same order as nodes are visited. The function can also help get the nodes directly if we don't pass any attribute: diff --git a/docs/src/tutorials/3.transform_mtg.md b/docs/src/tutorials/3.transform_mtg.md index 2b573c2..d42de4d 100644 --- a/docs/src/tutorials/3.transform_mtg.md +++ b/docs/src/tutorials/3.transform_mtg.md @@ -40,12 +40,14 @@ Here is a summary of the different forms you can use: 1. a `:var_name => :new_var_name` pair. This form is used to rename an attribute name 2. a `:var_name => function => :new_var_name` or `[:var_name1, :var_name2...] => function => :new_var_name`. The variables are declared as a `Symbol` or a `String` (or a vector of), and they are passed as positional arguments to `function`. The new variable name is optional, and is automatically generated if not provided by concatenating the source column name(s) and the function name if any, this form would be used as: `:var_name => function`. 3. a `function => :new_var_name` form that applies a function to a node and puts the results in a new attribute. This form is usually applied when searching ancestors or descendants values. -4. a `function` form that applies a mutating function to a node, without expecting any output. This form is used when using a function that already mutates the node, without the need to return anything, *e.g.* [`branching_order!`](@ref). +4. a `function` form that applies a function that directly changes the node, without expecting any output. This form is used when your function already updates the node by itself, *e.g.* [`branching_order!`](@ref). This tutorial is a deep dive into these different forms. !!! note - All examples use the mutating version [`transform!`](@ref), but there is a non-mutating version too ([`transform`](@ref)). It is used likewise but returns a modified copy of the `mtg`, which is a little bit slower. + All examples use [`transform!`](@ref), which updates the same MTG object in place. + The version without `!` ([`transform`](@ref)) returns a new modified copy instead. + The copy version is usually a little slower, but it keeps your original MTG unchanged. ## Form 1: Rename an attribute @@ -69,7 +71,7 @@ print(get_attributes(mtg)) Yes it did! -The equivalent call with the non-mutating version of transform is: +The equivalent call with the copy-returning version (`transform`, without `!`) is: ```@example usepkg new_mtg = transform(mtg, :Width => :width) @@ -242,7 +244,7 @@ select!(mtg_select, :Length => (x -> x / 10) => :length_m, :Width, ignore_nothin DataFrame(mtg_select) ``` -There is also a non-mutating version of the function: +There is also a copy-returning version of the function (`select`, without `!`): ```@example usepkg mtg_select = select(mtg, :Length => (x -> x / 10) => :length_m, :Width, ignore_nothing = true) diff --git a/docs/src/tutorials/6.add_remove_nodes.md b/docs/src/tutorials/6.add_remove_nodes.md index c8d1ae5..cadfa5b 100644 --- a/docs/src/tutorials/6.add_remove_nodes.md +++ b/docs/src/tutorials/6.add_remove_nodes.md @@ -16,7 +16,7 @@ Create the root node: mtg = Node(MutableNodeMTG("/", "Plant", 0, 1), Dict{Symbol,Any}(:species => "Grassy-plant")) ``` -The first argument to [`Node`](@ref) is its MTG encoding, which describes the topology of the node: what kind of link it has with its parent, its scale, its index and its symbol. It is given as a [`MutableNodeMTG`](@ref) (or a [`NodeMTG`](@ref)). The second argument is used to add attributes to the MTG. +The first argument to [`Node`](@ref) is its MTG description: link with parent, scale, index, and symbol. It is given as a [`MutableNodeMTG`](@ref) (or a [`NodeMTG`](@ref)). The second argument contains node attributes. ### Node id @@ -77,7 +77,7 @@ DataFrame(mtg, get_attributes(mtg)) #### Insertion functions -Adding nodes recursively is easy, but sometimes we want to insert nodes in-between other nodes. We can still use `Node` to do so, but it becomes a bit cumbersome because you'll have to handle manually the changes in parents, children and siblings. +Adding nodes step by step is easy, but sometimes we want to insert nodes in-between other nodes. We can still use `Node` to do so, but it becomes a bit cumbersome because you'll have to handle manually the changes in parents, children and siblings. We provide some helper functions that does it for you instead: @@ -239,7 +239,10 @@ delete_nodes!(mtg_3, filter_fun = node -> node_mtg(node).index >= 1) ``` !!! note - [`delete_nodes!`](@ref) always return the root node of the MTG. If the root node of the original MTG is deleted, its child becomes the new root node. If the root had several children, it returns an error. The function always return the root node of the new MTG, so if the root has not been modified, it remains the same, but if it has been deleted, the new root is returned. That is why it is preferable to use [`delete_nodes!`](@ref) has a non-mutating function and re-assign the results to an object if it is planned to remove the root node. + [`delete_nodes!`](@ref) always returns the root node of the resulting MTG. + If the old root was deleted and had one child, that child becomes the new root. + If the old root had several children, the function returns an error. + In practice, re-assign the result to keep the correct root reference after deletions. As for [`delete_node!`](@ref) (singular), by default [`delete_nodes!`](@ref) (plural) uses [`new_child_link`](@ref) to re-link the children of the deleted nodes, but the user can provide a custom function. See the function details to learn more about it. diff --git a/docs/src/tutorials/7.performance_considerations.md b/docs/src/tutorials/7.performance_considerations.md index d26a0ef..05b2bbe 100644 --- a/docs/src/tutorials/7.performance_considerations.md +++ b/docs/src/tutorials/7.performance_considerations.md @@ -8,13 +8,28 @@ It is important to note that MultiScaleTreeGraph.jl is high-performance by desig ## Performance Tips -### Attribute types +### Vocabulary -The type used to store the attributes in the nodes are completely free, and can have a significant impact on performance. By default, the attributes are stored in a `Dict{Symbol, Any}`. This is a very flexible type that allows for getting attributes by name, updating their values, and adding or deleting attributes. However, this flexibility comes at a cost. The `Dict` type is fast but slower than more optimized types. +This page uses a few technical words. Here is what they mean in MTG terms: -If the user just wants to read an MTG and extract information from it, but not update values or add new attributes, it is recommended to use a `NamedTuple` instead. This will improve performance significantly. +- **Traversal**: visiting nodes one after another in the graph. +- **Request**: asking the MTG for data, for example `descendants(mtg, :Length)` or `ancestors(node, :Width)`. +- **Direct traversal**: reading the graph by following parent/children links directly. +- **Indexed traversal**: using a precomputed lookup table to answer some descendant requests faster. +- **DFS (Depth-First Search)**: one way to visit nodes; it goes deep into one branch before moving to the next branch. -If the user still want to update values, but does not need to repeatedly add or delete attributes, it is recommended to use a `MutableNamedTuple`. This will improve performance significantly over `Dict`s. +### Attribute backend + +By default, `read_mtg` uses `ColumnarStore`, a typed per-symbol columnar backend. This is optimized for traversal + attribute extraction workloads and reduces repeated dictionary lookups in hot loops. + +The explicit attribute API is: + +- `attribute(node, :key; default=nothing)` +- `attribute!(node, :key, value)` +- `attributes(node, format=:namedtuple | :dict)` +- `add_column!`, `drop_column!`, `rename_column!` for schema updates + +`read_mtg` always uses the typed columnar backend. If you build nodes manually, you can still pass `Dict`/`NamedTuple` values, and they are converted automatically to columnar attributes. ### MTG encoding @@ -31,12 +46,12 @@ traverse(mtg, x -> x, symbol=:Internode, link=:<) ### Traversal: node caching -MultiScaleTreeGraph.jl traverses all nodes by default when performing tree traversal. The traversal is done in a recursive manner so it is performant, but not always as fast as it could be. For example, we could have a very large tree with only two leaves at the top. In this case, we would traverse all nodes in the tree, even though we only need to traverse two nodes. +MultiScaleTreeGraph.jl visits all nodes by default when traversing a tree. This is usually fast, but not always optimal. For example, in a very large tree with only two leaves of interest, we still visit every node even though we only need those two leaves. To improve performance, it is possible to cache any type of `traversal`, including any kind of filter on the nodes, and then use the cached traversal instead of the default one. This will improve performance significantly. !!! note - A cache is a data structure that stores the result of a computation so that it can be reused later. In this case, the cache stores a pointer to the nodes from the traversal so that it can be efficiently reused later. This is a common technique to improve performance at the cost of memory, though the memory cost is usually very small. + A cache is simply a saved result that can be reused later. Here, it saves references to the nodes selected by a traversal so the package does not need to re-scan the full tree each time. This usually improves speed at the cost of a small amount of extra memory. To cache a traversal, you can use [`cache_nodes!`](@ref). For example, if you want to cache all the **leaf** nodes in the MTG, you can do: @@ -51,19 +66,42 @@ This will cache all the nodes with the symbol `:Leaf` in the MTG. Then, the tree ### Traversal: descendants values caching -Similarly to caching nodes during tree traversal, the mutating version of [`descendants`](@ref) -[`descendants!`](@ref)- provides a way to cache the values from the descendants of a node. This is useful when the descendants of a node are needed multiple times, as it avoids traversing the tree multiple times. For example, this is useful when computing the total biomass of all wood each segment supports in a tree, as the biomass of a node is the sum of the biomass of its descendants. +Similarly to caching nodes during tree traversal, [`descendants!`](@ref) (the `!` means "update in place") provides a way to cache values from descendants of a node. This is useful when these values are needed many times, because it avoids traversing the tree repeatedly. For example, this is useful when computing total biomass supported by each segment. ### In-place traversal outputs -For repeated queries on large trees, prefer reusing buffers with in-place methods to reduce allocations: +For repeated requests on large trees, prefer reusing buffers with in-place methods to reduce allocations: ```julia vals = Float64[] nodes = typeof(mtg)[] -descendants!(vals, mtg, :Length, type=Float64) -ancestors!(vals, get_node(mtg, 5), :Length, type=Float64) +descendants!(vals, mtg, :Length) +ancestors!(vals, get_node(mtg, 5), :Length) descendants!(nodes, mtg, self=true) ``` -This pattern is especially useful when the same query is executed many times (e.g. across millions of leaves). +This pattern is especially useful when the same request is executed many times (e.g. across millions of leaves). + +`type=` for `descendants`/`ancestors` is deprecated because return eltypes are inferred automatically from typed columns. + +### Hybrid descendants backend (growth-safe) + +For `descendants(node, key, ...)` on columnar MTGs, the package supports: + +- `:auto` (default): use direct graph traversal when the MTG is changing a lot, and switch to indexed traversal when read requests dominate +- `:pointer`: always use direct graph traversal +- `:indexed`: always use the index (rebuilding it when structure changed) + +In other words: + +- If your simulation is currently **growing a lot** (many inserted/deleted organs), `:pointer` behavior is usually best. +- If your simulation is mostly **reading values repeatedly** from a mostly stable structure, `:indexed` behavior can be faster. +- `:auto` tries to choose between both automatically. + +```julia +descendants_strategy(mtg) # :auto +descendants_strategy!(mtg, :pointer) +descendants_strategy!(mtg, :indexed) +descendants_strategy!(mtg, :auto) +``` diff --git a/src/MultiScaleTreeGraph.jl b/src/MultiScaleTreeGraph.jl index f25a5f7..cad7a61 100644 --- a/src/MultiScaleTreeGraph.jl +++ b/src/MultiScaleTreeGraph.jl @@ -12,11 +12,13 @@ import DataFrames: DataFrame, insertcols! import DataFrames: transform!, transform # We define our own version for transforming the MTG import DataFrames: select!, select # We define our own version for transforming the MTG import DataFrames: rename! # We define our own version for renaming node attributes +import Tables import MetaGraphsNext: MetaGraph, code_for, add_edge! # Transform to MetaGraph import Graphs.DiGraph import Dates: Date, @dateformat_str, format include("types/AbstractNodeMTG.jl") +include("types/Attributes.jl") include("types/Node.jl") include("read_MTG/read_MTG.jl") include("read_MTG/strip_comments.jl") @@ -35,6 +37,7 @@ include("compute_MTG/check_filters.jl") include("compute_MTG/mutation.jl") include("compute_MTG/append_attributes.jl") include("compute_MTG/traverse.jl") +include("compute_MTG/columnarize.jl") include("compute_MTG/transform.jl") include("compute_MTG/select.jl") include("compute_MTG/delete_nodes.jl") @@ -48,6 +51,7 @@ include("compute_MTG/nleaves.jl") include("compute_MTG/pipe_model.jl") include("compute_MTG/get_node.jl") include("conversion/DataFrame.jl") +include("conversion/Tables.jl") include("conversion/MetaGraph.jl") export read_mtg @@ -91,6 +95,13 @@ export names, scales, symbols, components export node_id, node_mtg, node_attributes export symbol, scale, index, link export symbol!, scale!, index!, link! +export ColumnarStore +export Column, SymbolBucket, MTGAttributeStore, NodeAttrRef +export attribute, attribute!, attributes, attribute_names +export add_column!, drop_column!, rename_column! +export descendants_strategy, descendants_strategy! +export columnarize! +export symbol_table, mtg_table export list_nodes export get_classes export get_description diff --git a/src/compute_MTG/ancestors.jl b/src/compute_MTG/ancestors.jl index 3c74c55..3d7e243 100644 --- a/src/compute_MTG/ancestors.jl +++ b/src/compute_MTG/ancestors.jl @@ -26,14 +26,7 @@ is filtered out (`false`). `recursivity_level = 2`. If a negative value is provided (the default), the function returns all valid values from the node to the root. - `ignore_nothing = false`: filter-out the nodes with `nothing` values for the given `key` -- `type::Union{Union,DataType}`: The type of the attribute. Makes the function run much -faster if provided (≈4x faster). - -# Note - -In most cases, the `type` argument should be given as a union of `Nothing` and the data type -of the attribute to manage missing or inexistant data, e.g. measurements made at one scale -only. See examples for more details. +- `type::Union{Union,DataType}`: Deprecated. Return types are inferred automatically. # Examples @@ -45,15 +38,11 @@ mtg = read_mtg(file) # Using a leaf node from the mtg: leaf_node = get_node(mtg, 5) -ancestors(leaf_node, :Length) # Short to write, but slower to execute - -# Fast version, note that we pass a union of Nothing and Float64 because there are some nodes -# without a `Length` attribute: -ancestors(leaf_node, :Length, type = Union{Nothing,Float64}) +ancestors(leaf_node, :Length) # Filter by scale: -ancestors(leaf_node, :XX, scale = 1, type = Float64) -ancestors(leaf_node, :Length, scale = 3, type = Float64) +ancestors(leaf_node, :XX, scale = 1) +ancestors(leaf_node, :Length, scale = 3, ignore_nothing=true) # Filter by symbol: ancestors(leaf_node, :Length, symbol = :Internode) @@ -73,6 +62,7 @@ function ancestors( type::Union{Union,DataType}=Any) symbol = normalize_symbol_filter(symbol) link = normalize_link_filter(link) + _maybe_depwarn_traversal_type_kw(:ancestors, type) # Check the filters once, and then compute the ancestors recursively using `ancestors_` check_filters(node, scale=scale, symbol=symbol, link=link) @@ -81,12 +71,14 @@ function ancestors( filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key) use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) - val = Array{type,1}() + out_type = type === Any ? infer_columnar_attr_type(node, Symbol(key), symbol, ignore_nothing) : type + val = Array{out_type,1}() + key_plan = build_columnar_query_plan(node, Symbol(key)) # Put the recursivity level into an array so it is mutable in-place: if self if use_no_filter || is_filtered(node, scale, symbol, link, filter_fun_) - val_ = unsafe_getindex(node, key) + val_ = unsafe_getindex(node, key, key_plan) push!(val, val_) elseif !all # We don't keep the value and we have to stop at the first filtered-out value @@ -95,15 +87,15 @@ function ancestors( end if use_no_filter - ancestors_values_no_filter!(node, key, val, recursivity_level) + ancestors_values_no_filter!(node, key, val, recursivity_level, key_plan) else - ancestors_values!(node, key, scale, symbol, link, all, filter_fun_, val, recursivity_level) + ancestors_values!(node, key, scale, symbol, link, all, filter_fun_, val, recursivity_level, key_plan) end return val end -function ancestors_values!(node, key, scale, symbol, link, all, filter_fun, val, recursivity_level) +function ancestors_values!(node, key, scale, symbol, link, all, filter_fun, val, recursivity_level, key_plan=nothing) current = node remaining = recursivity_level @@ -112,7 +104,7 @@ function ancestors_values!(node, key, scale, symbol, link, all, filter_fun, val, keep = is_filtered(parent_, scale, symbol, link, filter_fun) if keep - push!(val, unsafe_getindex(parent_, key)) + push!(val, unsafe_getindex(parent_, key, key_plan)) # Only decrement the recursivity level when the current node is not filtered-out remaining -= 1 end @@ -124,13 +116,13 @@ function ancestors_values!(node, key, scale, symbol, link, all, filter_fun, val, return val end -function ancestors_values_no_filter!(node, key, val, recursivity_level) +function ancestors_values_no_filter!(node, key, val, recursivity_level, key_plan=nothing) current = node remaining = recursivity_level while !isroot(current) && remaining != 0 parent_ = parent(current) - push!(val, unsafe_getindex(parent_, key)) + push!(val, unsafe_getindex(parent_, key, key_plan)) remaining -= 1 current = parent_ end @@ -225,23 +217,25 @@ function ancestors!( ) symbol = normalize_symbol_filter(symbol) link = normalize_link_filter(link) + _maybe_depwarn_traversal_type_kw(:ancestors!, type) check_filters(node, scale=scale, symbol=symbol, link=link) filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key) use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) + key_plan = build_columnar_query_plan(node, Symbol(key)) empty!(out) if self if use_no_filter || is_filtered(node, scale, symbol, link, filter_fun_) - push!(out, unsafe_getindex(node, key)) + push!(out, unsafe_getindex(node, key, key_plan)) elseif !all return out end end if use_no_filter - ancestors_values_no_filter!(node, key, out, recursivity_level) + ancestors_values_no_filter!(node, key, out, recursivity_level, key_plan) else - ancestors_values!(node, key, scale, symbol, link, all, filter_fun_, out, recursivity_level) + ancestors_values!(node, key, scale, symbol, link, all, filter_fun_, out, recursivity_level, key_plan) end return out end diff --git a/src/compute_MTG/append_attributes.jl b/src/compute_MTG/append_attributes.jl index cf51a5e..1201ebe 100755 --- a/src/compute_MTG/append_attributes.jl +++ b/src/compute_MTG/append_attributes.jl @@ -20,6 +20,13 @@ function Base.append!(node::Node{M,T}, attr::T) where {M<:AbstractNodeMTG,T<:Abs merge!(node_attributes(node), attr) end +function Base.append!(node::Node{<:AbstractNodeMTG,ColumnarAttrs}, attr) + for (k, v) in pairs(attr) + attribute!(node, k, v) + end + return node +end + # And ensure compatibility between both so a script wouldn't be broken if we just change the # type of the attributes: function Base.append!(node::Node{<:AbstractNodeMTG,<:AbstractDict}, attr) @@ -46,6 +53,10 @@ function Base.pop!(node::Node{<:AbstractNodeMTG,<:AbstractDict}, key) return poped_value end +function Base.pop!(node::Node{<:AbstractNodeMTG,ColumnarAttrs}, key) + pop!(node_attributes(node), key, nothing) +end + # Renaming attributes: function rename!(node::Node{M,T}, old_new) where {M<:AbstractNodeMTG,T<:MutableNamedTuple} attr_keys = replace([i for i in keys(node_attributes(node))], old_new) @@ -63,3 +74,7 @@ function rename!(node::Node{<:AbstractNodeMTG,<:AbstractDict}, old_new) first(kv) == first(old_new) ? last(old_new) => last(kv) : kv end end + +function rename!(node::Node{<:AbstractNodeMTG,ColumnarAttrs}, old_new) + rename_column!(node, symbol(node), Symbol(first(old_new)), Symbol(last(old_new))) +end diff --git a/src/compute_MTG/caching.jl b/src/compute_MTG/caching.jl index 88b9278..a0085b7 100644 --- a/src/compute_MTG/caching.jl +++ b/src/compute_MTG/caching.jl @@ -41,7 +41,7 @@ when traversing using [`traverse!`](@ref) or [`transform!`](@ref). ```julia file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))), "test", "files", "simple_plant.mtg") -mtg = read_mtg(file, Dict) +mtg = read_mtg(file) # Cache all leaf nodes: cache_nodes!(mtg, symbol=:Leaf) diff --git a/src/compute_MTG/columnarize.jl b/src/compute_MTG/columnarize.jl new file mode 100644 index 0000000..f718297 --- /dev/null +++ b/src/compute_MTG/columnarize.jl @@ -0,0 +1,25 @@ +""" + columnarize!(mtg::Node) + +Bind all node attributes to a single `MTGAttributeStore`. +""" +function columnarize!(mtg::Node) + nodes = traverse(mtg, node -> node, type=typeof(mtg)) + isempty(nodes) && return mtg + + store = MTGAttributeStore() + for n in nodes + attrs = node_attributes(n) + attrs isa ColumnarAttrs || error("columnarize! expects nodes with ColumnarAttrs attributes.") + raw = _isbound(attrs) ? Dict{Symbol,Any}(pairs(attrs)) : attrs.staged + _add_node_with_attrs!(store, node_id(n), symbol(n), raw) + end + + for n in nodes + attrs = node_attributes(n)::ColumnarAttrs + attrs.ref.store = store + attrs.ref.node_id = node_id(n) + empty!(attrs.staged) + end + return mtg +end diff --git a/src/compute_MTG/delete_nodes.jl b/src/compute_MTG/delete_nodes.jl index 007b46d..cd07aef 100644 --- a/src/compute_MTG/delete_nodes.jl +++ b/src/compute_MTG/delete_nodes.jl @@ -150,6 +150,8 @@ function delete_node!(node::Node{N,A}; child_link_fun=new_child_link) where {N<: end # Clean the old deleted node (isolate it, no parent, no children): + attrs = node_attributes(node) + attrs isa ColumnarAttrs && remove_columnar_node!(attrs) reparent!(node, nothing) rechildren!(node, Node{N,A}[]) diff --git a/src/compute_MTG/descendants.jl b/src/compute_MTG/descendants.jl index d9fa706..093dfc4 100644 --- a/src/compute_MTG/descendants.jl +++ b/src/compute_MTG/descendants.jl @@ -1,27 +1,101 @@ -function collect_descendant_values!(node, key, scale, symbol, link, filter_fun, all, val, recursivity_level) +@inline function _maybe_depwarn_traversal_type_kw(fname::Symbol, type) + type === Any && return nothing + Base.depwarn( + "Keyword argument `type` in `$fname` is deprecated and will be removed in a future release. " * + "Return types are inferred automatically from the columnar attribute store; remove `type` " * + "and use `ignore_nothing=true` when you want `nothing` values filtered out.", + fname, + ) + return nothing +end + +@inline function _descendants_index_compatible(scale, link, all::Bool, filter_fun, recursivity_level) + scale === nothing || return false + link === nothing || return false + all || return false + filter_fun === nothing || return false + return isinf(recursivity_level) || recursivity_level < 0 +end + +@inline function _bucket_allow_mask(store::MTGAttributeStore, symbol_filter) + symbol_filter === nothing && return nothing + mask = falses(length(store.buckets)) + for bid in _symbol_bucket_ids(store, symbol_filter) + mask[bid] = true + end + return mask +end + +function _collect_descendant_values_indexed!( + out::AbstractVector, + node, + key::Symbol, + key_plan::ColumnarQueryPlan, + symbol_filter, + self::Bool, + ignore_nothing::Bool, +) + store = _columnar_store(node) + store === nothing && return false + _prepare_subtree_index!(store, get_root(node)) || return false + idx = store.subtree_index + + nid0 = node_id(node) + nid0 > length(idx.tin) && return false + left = idx.tin[nid0] + right = idx.tout[nid0] + left == 0 && return false + self || (left += 1) + left > right && return true + + allow_mask = _bucket_allow_mask(store, symbol_filter) + + @inbounds for i in left:right + nid = idx.dfs_order[i] + bid = store.node_bucket[nid] + bid == 0 && continue + allow_mask === nothing || allow_mask[bid] || continue + + col_idx = key_plan.col_idx_by_bucket[bid] + if col_idx == 0 + ignore_nothing && continue + push!(out, nothing) + continue + end + + row = store.node_row[nid] + v = store.buckets[bid].columns[col_idx].data[row] + ignore_nothing && v === nothing && continue + push!(out, v) + end + + return true +end + +function collect_descendant_values!(node, key, scale, symbol, link, filter_fun, all, val, recursivity_level, key_plan=nothing) recursivity_level == 0 && return val recursivity_level -= 1 keep = is_filtered(node, scale, symbol, link, filter_fun) if keep - push!(val, unsafe_getindex(node, key)) + push!(val, unsafe_getindex(node, key, key_plan)) elseif !all return val end @inbounds for chnode in children(node) - collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun, all, val, recursivity_level) + collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun, all, val, recursivity_level, key_plan) end return val end -function collect_descendant_values_no_filter!(node, key, val, recursivity_level) +function collect_descendant_values_no_filter!(node, key, val, recursivity_level, key_plan=nothing) recursivity_level == 0 && return val recursivity_level -= 1 - push!(val, unsafe_getindex(node, key)) + push!(val, unsafe_getindex(node, key, key_plan)) @inbounds for chnode in children(node) - collect_descendant_values_no_filter!(chnode, key, val, recursivity_level) + collect_descendant_values_no_filter!(chnode, key, val, recursivity_level, key_plan) end return val end @@ -67,29 +141,39 @@ function descendants( type::Union{Union,DataType}=Any) symbol = normalize_symbol_filter(symbol) link = normalize_link_filter(link) + _maybe_depwarn_traversal_type_kw(:descendants, type) # Check the filters once, and then compute the descendants recursively using `descendants_` check_filters(node, scale=scale, symbol=symbol, link=link) - # Change the filtering function if we also want to remove nodes with nothing values: - filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key) + key_ = Symbol(key) + out_type = type === Any ? infer_columnar_attr_type(node, key_, symbol, ignore_nothing) : type + val = Array{out_type,1}() + key_plan = build_columnar_query_plan(node, key_) - val = Array{type,1}() + if key_plan isa ColumnarQueryPlan && + _descendants_index_compatible(scale, link, all, filter_fun, recursivity_level) && + _collect_descendant_values_indexed!(val, node, key_, key_plan, symbol, self, ignore_nothing) + return val + end + + # Change the filtering function if we also want to remove nodes with nothing values: + filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key_) use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) if self if use_no_filter - collect_descendant_values_no_filter!(node, key, val, recursivity_level) + collect_descendant_values_no_filter!(node, key_, val, recursivity_level, key_plan) else - collect_descendant_values!(node, key, scale, symbol, link, filter_fun_, all, val, recursivity_level) + collect_descendant_values!(node, key_, scale, symbol, link, filter_fun_, all, val, recursivity_level, key_plan) end else # If we don't want to include the value of the current node, we apply the traversal to its children directly: for chnode in children(node) if use_no_filter - collect_descendant_values_no_filter!(chnode, key, val, recursivity_level) + collect_descendant_values_no_filter!(chnode, key_, val, recursivity_level, key_plan) else - collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun_, all, val, recursivity_level) + collect_descendant_values!(chnode, key_, scale, symbol, link, filter_fun_, all, val, recursivity_level, key_plan) end end end @@ -152,23 +236,33 @@ function descendants!( ) symbol = normalize_symbol_filter(symbol) link = normalize_link_filter(link) + _maybe_depwarn_traversal_type_kw(:descendants!, type) check_filters(node, scale=scale, symbol=symbol, link=link) - filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key) - use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) + key_ = Symbol(key) + key_plan = build_columnar_query_plan(node, key_) empty!(out) + if key_plan isa ColumnarQueryPlan && + _descendants_index_compatible(scale, link, all, filter_fun, recursivity_level) && + _collect_descendant_values_indexed!(out, node, key_, key_plan, symbol, self, ignore_nothing) + return out + end + + filter_fun_ = filter_fun_nothing(filter_fun, ignore_nothing, key_) + use_no_filter = no_node_filters(scale, symbol, link, filter_fun_) + if self if use_no_filter - collect_descendant_values_no_filter!(node, key, out, recursivity_level) + collect_descendant_values_no_filter!(node, key_, out, recursivity_level, key_plan) else - collect_descendant_values!(node, key, scale, symbol, link, filter_fun_, all, out, recursivity_level) + collect_descendant_values!(node, key_, scale, symbol, link, filter_fun_, all, out, recursivity_level, key_plan) end else for chnode in children(node) if use_no_filter - collect_descendant_values_no_filter!(chnode, key, out, recursivity_level) + collect_descendant_values_no_filter!(chnode, key_, out, recursivity_level, key_plan) else - collect_descendant_values!(chnode, key, scale, symbol, link, filter_fun_, all, out, recursivity_level) + collect_descendant_values!(chnode, key_, scale, symbol, link, filter_fun_, all, out, recursivity_level, key_plan) end end end @@ -224,6 +318,7 @@ function descendants!( type::Union{Union,DataType}=Any) where {N,A<:AbstractDict} symbol = normalize_symbol_filter(symbol) link = normalize_link_filter(link) + _maybe_depwarn_traversal_type_kw(:descendants!, type) # Check the filters once, and then compute the descendants recursively using `descendants_` check_filters(node, scale=scale, symbol=symbol, link=link) @@ -323,8 +418,7 @@ is filtered out (`false`). grand-children: `recursivity_level = 2`. If `Inf` (the default) or a negative value is provided, there is no recursion limitation. - `ignore_nothing = false`: filter-out the nodes with `nothing` values for the given `key` -- `type::Union{Union,DataType}`: The type of the attribute. Can make the function run much -faster if provided (*e.g.* ≈4x faster). +- `type::Union{Union,DataType}`: Deprecated. Return types are inferred automatically. # Tips @@ -332,12 +426,6 @@ faster if provided (*e.g.* ≈4x faster). To get the values of the leaves use [`isleaf`](@ref) as the filtering function, e.g.: `descendants(mtg, :Width; filter_fun = isleaf)`. -# Note - -In most cases, the `type` argument should be given as a union of `Nothing` and the data type -of the attribute to manage missing or inexistant data, e.g. measurements made at one scale -only. See examples for more details. - # Examples ```julia @@ -345,15 +433,11 @@ only. See examples for more details. file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))),"test","files","simple_plant.mtg") mtg = read_mtg(file) -descendants(mtg, :Length) # Short to write, but slower to execute - -# Fast version, note that we pass a union of Nothing and Float64 because there are some nodes -# without a `Length` attribute: -descendants(mtg, :Length, type = Union{Nothing,Float64}) +descendants(mtg, :Length) # Filter by scale: -descendants(mtg, :XEuler, scale = 3, type = Union{Nothing, Float64}) -descendants(mtg, :Length, scale = 3, type = Float64) # No `nothing` value here, no need of a union type +descendants(mtg, :XEuler, scale = 3) +descendants(mtg, :Length, scale = 3, ignore_nothing=true) # No `nothing` in output # Filter by symbol: descendants(mtg, :Length, symbol = :Leaf) diff --git a/src/compute_MTG/indexing.jl b/src/compute_MTG/indexing.jl index 8722cb8..c932051 100644 --- a/src/compute_MTG/indexing.jl +++ b/src/compute_MTG/indexing.jl @@ -64,6 +64,24 @@ function unsafe_getindex(node::Node{M,T} where {M<:AbstractNodeMTG,T<:AbstractDi unsafe_getindex(node, Symbol(key)) end +@inline function unsafe_getindex(node::Node{<:AbstractNodeMTG,ColumnarAttrs}, key::Symbol, plan::ColumnarQueryPlan) + attrs = node_attributes(node) + store = attrs.ref.store + store === nothing && return nothing + nodeid = node_id(node) + nodeid > length(store.node_bucket) && return nothing + bid = store.node_bucket[nodeid] + bid == 0 && return nothing + col_idx = plan.col_idx_by_bucket[bid] + col_idx == 0 && return nothing + row = store.node_row[nodeid] + col = store.buckets[bid].columns[col_idx] + return col.data[row] +end + +@inline unsafe_getindex(node::Node, key::Symbol, plan) = unsafe_getindex(node, key) +@inline unsafe_getindex(node::Node, key, plan) = unsafe_getindex(node, Symbol(key), plan) + """ Returns the length of the subtree below the node (including it) """ diff --git a/src/compute_MTG/insert_nodes.jl b/src/compute_MTG/insert_nodes.jl index d085ace..89cbad5 100644 --- a/src/compute_MTG/insert_nodes.jl +++ b/src/compute_MTG/insert_nodes.jl @@ -266,6 +266,23 @@ insert_parent!( """ insert_parent!, insert_generation!, insert_child!, insert_sibling! +@inline function _coerce_insert_attrs(::Type{A}, attrs) where {A} + attrs +end + +@inline function _coerce_insert_attrs(::Type{ColumnarAttrs}, attrs) + _to_columnar_attrs(attrs) +end + +@inline function _bind_inserted_columnar!(::Type{A}, parent_for_store::Node, inserted::Node) where {A} + nothing +end + +function _bind_inserted_columnar!(::Type{ColumnarAttrs}, parent_for_store::Node, inserted::Node) + bind_columnar_child!(node_attributes(parent_for_store), node_attributes(inserted), node_id(inserted), symbol(inserted)) + return nothing +end + function insert_parent!(node::Node{N,A}, template, attr_fun=node -> A(), maxid=[max_id(node)]) where {N<:AbstractNodeMTG,A} maxid[1] += 1 @@ -277,9 +294,10 @@ function insert_parent!(node::Node{N,A}, template, attr_fun=node -> A(), maxid=[ nothing, Node{N,A}[node], new_node_MTG(node, template), - copy(attr_fun(node)), + _coerce_insert_attrs(A, copy(attr_fun(node))), Dict{String,Vector{Node{N,A}}}() ) + _bind_inserted_columnar!(A, node, new_node) # Add to the new root the mandatory root attributes: root_attrs = Dict( @@ -298,9 +316,10 @@ function insert_parent!(node::Node{N,A}, template, attr_fun=node -> A(), maxid=[ parent(node), Node{N,A}[node], new_node_MTG(node, template), - copy(attr_fun(node)), + _coerce_insert_attrs(A, copy(attr_fun(node))), Dict{String,Vector{Node{N,A}}}() ) + _bind_inserted_columnar!(A, parent(node), new_node) # Add the new node to the parent: deleteat!(children(parent(node)), findfirst(x -> node_id(x) == node_id(node), children(parent(node)))) @@ -336,9 +355,10 @@ function insert_sibling!(node::Node{N,A}, template, attr_fun=node -> A(), maxid= parent(node), Vector{Node{N,A}}(), new_node_MTG(node, template), - copy(attr_fun(node)), + _coerce_insert_attrs(A, copy(attr_fun(node))), Dict{String,Vector{Node{N,A}}}() ) + _bind_inserted_columnar!(A, parent(node), new_node) # Add the new node to the children of the parent node: push!(children(parent(node)), new_node) @@ -355,9 +375,10 @@ function insert_generation!(node::Node{N,A}, template, attr_fun=node -> A(), max node, children(node), new_node_MTG(node, template), - copy(attr_fun(node)), + _coerce_insert_attrs(A, copy(attr_fun(node))), Dict{String,Vector{Node{N,A}}}() ) + _bind_inserted_columnar!(A, node, new_node) # Add the new node as the only child of the node: rechildren!(node, Node{N,A}[new_node]) diff --git a/src/compute_MTG/prune.jl b/src/compute_MTG/prune.jl index 7df3603..140aa1c 100644 --- a/src/compute_MTG/prune.jl +++ b/src/compute_MTG/prune.jl @@ -23,6 +23,11 @@ function prune!(node) else parent_node = parent(node) + traverse!(node) do n + attrs = node_attributes(n) + attrs isa ColumnarAttrs && remove_columnar_node!(attrs) + end + # Delete the node as child of his parent: deleteat!(children(parent_node), findfirst(x -> node_id(x) == node_id(node), children(parent_node))) end diff --git a/src/conversion/Tables.jl b/src/conversion/Tables.jl new file mode 100644 index 0000000..f1f09ec --- /dev/null +++ b/src/conversion/Tables.jl @@ -0,0 +1,114 @@ +struct MTGTableView + names::Vector{Symbol} + name_to_idx::Dict{Symbol,Int} + cols::Vector{AbstractVector} +end + +function MTGTableView(names::Vector{Symbol}, cols::Vector{AbstractVector}) + idx = Dict{Symbol,Int}() + @inbounds for i in eachindex(names) + idx[names[i]] = i + end + MTGTableView(names, idx, cols) +end + +Tables.istable(::Type{MTGTableView}) = true +Tables.columnaccess(::Type{MTGTableView}) = true +Tables.columns(t::MTGTableView) = t +Tables.columnnames(t::MTGTableView) = Tuple(t.names) +Tables.getcolumn(t::MTGTableView, i::Int) = t.cols[i] +Tables.getcolumn(t::MTGTableView, name::Symbol) = t.cols[t.name_to_idx[name]] +Tables.schema(t::MTGTableView) = Tables.Schema(Tuple(t.names), Tuple(eltype.(t.cols))) + +@inline function _value_type_or_missing(values::Vector{Any}) + T = Union{} + has_missing = false + @inbounds for v in values + if v === missing + has_missing = true + else + T = T === Union{} ? typeof(v) : typejoin(T, typeof(v)) + end + end + + if T === Union{} + return Missing + end + return has_missing ? Union{Missing,T} : T +end + +function _typed_column(values::Vector{Any}) + T = _value_type_or_missing(values) + out = Vector{T}(undef, length(values)) + @inbounds for i in eachindex(values) + out[i] = values[i] + end + out +end + +function _collect_attr_names(nodes) + out = Symbol[] + seen = Set{Symbol}() + for n in nodes + for key in attribute_names(n) + if !(key in seen) + push!(seen, key) + push!(out, key) + end + end + end + out +end + +function _build_attr_columns(nodes, attr_names) + cols = Vector{AbstractVector}(undef, length(attr_names)) + @inbounds for i in eachindex(attr_names) + key = attr_names[i] + values = Vector{Any}(undef, length(nodes)) + for j in eachindex(nodes) + v = attribute(nodes[j], key, default=nothing) + values[j] = v === nothing ? missing : v + end + cols[i] = _typed_column(values) + end + cols +end + +function symbol_table(mtg::Node, symbol::Symbol) + nodes = traverse(mtg, node -> node, symbol=symbol, type=typeof(mtg)) + attr_names = _collect_attr_names(nodes) + attr_cols = _build_attr_columns(nodes, attr_names) + + names = Symbol[:node_id] + cols = Vector{AbstractVector}(undef, 1 + length(attr_cols)) + cols[1] = [node_id(n) for n in nodes] + + @inbounds for i in eachindex(attr_names) + push!(names, attr_names[i]) + cols[i+1] = attr_cols[i] + end + + MTGTableView(names, cols) +end + +function mtg_table(mtg::Node) + nodes = traverse(mtg, node -> node, type=typeof(mtg)) + attr_names = _collect_attr_names(nodes) + attr_cols = _build_attr_columns(nodes, attr_names) + + names = Symbol[:node_id, :symbol, :scale, :index, :link, :parent_id] + cols = Vector{AbstractVector}(undef, 6 + length(attr_cols)) + cols[1] = [node_id(n) for n in nodes] + cols[2] = [symbol(n) for n in nodes] + cols[3] = [scale(n) for n in nodes] + cols[4] = [index(n) for n in nodes] + cols[5] = [link(n) for n in nodes] + cols[6] = [isroot(n) ? missing : node_id(parent(n)) for n in nodes] + + @inbounds for i in eachindex(attr_names) + push!(names, attr_names[i]) + cols[i+6] = attr_cols[i] + end + + MTGTableView(names, cols) +end diff --git a/src/read_MTG/parse_mtg.jl b/src/read_MTG/parse_mtg.jl index 17a7d8a..c7239b4 100644 --- a/src/read_MTG/parse_mtg.jl +++ b/src/read_MTG/parse_mtg.jl @@ -19,7 +19,7 @@ The buffered IO stream (`f`) should start at the line of the section. The parsed MTG section """ -function parse_mtg!(f, classes, features, line, l, attr_type, mtg_type) +function parse_mtg!(f, classes, features, line, l, mtg_type) l[1] = next_line!(f, line) if length(l[1]) == 0 @@ -78,7 +78,7 @@ function parse_mtg!(f, classes, features, line, l, attr_type, mtg_type) while !eof(f) l[1] = next_line!(f, line; whitespace=false) length(l[1]) == 0 && continue # ignore empty line - parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, attr_type, mtg_type, features, classes) + parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, classes) end catch e error( @@ -132,7 +132,6 @@ Parse MTG node attributes names, values and type # Arguments - `node_data::String`: A splitted mtg node data (attributes) -- `attr_type::DataType`: the type of the structure used to hold the attributes - `features::DataFrame`: The features data.frame - `attr_column_start::Integer`: The index of the column of the first attribute - `line::Integer`: The current line of the mtg file @@ -143,10 +142,10 @@ Parse MTG node attributes names, values and type A list of attributes """ -function parse_MTG_node_attr(node_data, attr_type, features, attr_column_start, line; force=false) +function parse_MTG_node_attr(node_data, features, attr_column_start, line; force=false) if length(node_data) < attr_column_start - return init_empty_attr(attr_type) + return init_empty_attr() end node_data_attr = node_data[attr_column_start:end] @@ -217,47 +216,26 @@ function parse_MTG_node_attr(node_data, attr_type, features, attr_column_start, end end - parse_node_attributes(attr_type, node_attr) + parse_node_attributes(node_attr) end """ - -Instantiate a `attr_type` struct with `node_attr` keys and values - -# Arguments - -- `attr_type::DataType`: the type of the structure used to hold the attributes -- `node_attr::String`: The node attributes as a `Dict` +Instantiate a `ColumnarAttrs` struct with `node_attr` keys and values. """ -function parse_node_attributes(attr_type::Type{T}, node_attr) where {T<:Union{NamedTuple,MutableNamedTuple}} - attr_type{tuple(Symbol.(keys(node_attr))...)}(tuple(values(node_attr)...)) -end - -function parse_node_attributes(attr_type::Type{T}, node_attr) where {T<:Union{AbstractDict}} - Dict{Symbol,Any}(zip(Symbol.(keys(node_attr)), values(node_attr))) +function parse_node_attributes(node_attr) + ColumnarAttrs(Dict{Symbol,Any}(zip(Symbol.(keys(node_attr)), values(node_attr)))) end -# node_attributes for DataFrame -function parse_node_attributes(::Type{DataFrame}, node_attr) - DataFrame(node_attr) -end - -function init_empty_attr(attr_type) - attr_type() -end - -function init_empty_attr(attr_type::Type{T}) where {T<:Union{AbstractDict}} - attr_type{Symbol,Any}() -end +init_empty_attr() = ColumnarAttrs() """ - parse_line_to_node!(tree_dict, l, line, attr_column_start, node_id, attr_type, mtg_type, features,classes) + parse_line_to_node!(tree_dict, l, line, attr_column_start, node_id, mtg_type, features,classes) Parse a line of the MTG file to a node and add it to the tree dictionary. It may also add several nodes if the line contains several MTG elements. """ -function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, attr_type, mtg_type, features, classes) +function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, classes) splitted_MTG = split(l[1], "\t") node_column = findfirst(x -> length(x) > 0, splitted_MTG) @@ -281,10 +259,10 @@ function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_co # Get node attributes: if features !== nothing - node_attr = parse_MTG_node_attr(node_data, attr_type, features, node_attr_column_start, line) + node_attr = parse_MTG_node_attr(node_data, features, node_attr_column_start, line) else # if there are no attribute in the MTG, we create an empty attribute: - node_attr = init_empty_attr(attr_type) + node_attr = init_empty_attr() end if node[1] == "^" @@ -329,7 +307,7 @@ function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_co if k == length(node) || findfirst(x -> x == k, shared) !== nothing node_k_attr = node_attr else - node_k_attr = init_empty_attr(attr_type) + node_k_attr = init_empty_attr() end if k == minimum(building_nodes) @@ -383,4 +361,4 @@ function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_co # Increment node unique ID: node_id[1] = node_id[1] + 1 end -end \ No newline at end of file +end diff --git a/src/read_MTG/read_MTG.jl b/src/read_MTG/read_MTG.jl index 1a01d8a..fbfb52a 100644 --- a/src/read_MTG/read_MTG.jl +++ b/src/read_MTG/read_MTG.jl @@ -1,12 +1,11 @@ """ - read_mtg(file, attr_type = Dict, mtg_type = MutableNodeMTG; sheet_name = nothing) + read_mtg(file, mtg_type = MutableNodeMTG; sheet_name = nothing) Read an MTG file # Arguments - `file::String`: The path to the MTG file. -- `attr_type::DataType = Dict`: the type used to hold the attribute values for each node. - `mtg_type = MutableNodeMTG`: the type used to hold the mtg encoding for each node (*i.e.* link, symbol, index, scale). See details section below. - `sheet_name = nothing`: the sheet name in case you're reading an `xlsx` or `xlsm` file. It @@ -14,14 +13,8 @@ reads the first sheet if `nothing` (default behavior). # Details -`attr_type` should be: - -- `NamedTuple` if you don't plan to modify the attributes of the mtg, *e.g.* to use them for -plotting or computing statistics... -- `MutableNamedTuple` if you plan to modify the attributes values but not adding new attributes -very often, *e.g.* recompute an attribute value... -- `Dict` or similar (*e.g.* `OrderedDict`) if you plan to heavily modify the attributes, *e.g.* -adding/removing attributes a lot +Attributes are always stored as `ColumnarAttrs` (typed columnar backend). +Input values from the file are converted automatically. The `MultiScaleTreeGraph` package provides two types for `mtg_type`, one immutable ([`NodeMTG`](@ref)), and one mutable ([`MutableNodeMTG`](@ref)). If you're planning on modifying the mtg encoding of @@ -43,15 +36,12 @@ The MTG root node. file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))),"test","files","simple_plant.mtg") mtg = read_mtg(file) -# Or using another `MutableNamedTuple` for the attributes to be able to add one if needed: -mtg = read_mtg(file,Dict); - # We can also read an mtg directly from an excel file from the field: file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))),"test","files","tree3h.xlsx") mtg = read_mtg(file) ``` """ -function read_mtg(file, attr_type=Dict, mtg_type=MutableNodeMTG; sheet_name=nothing) +function read_mtg(file, mtg_type=MutableNodeMTG; sheet_name=nothing) file_extension = splitext(basename(file))[2] if file_extension == ".xlsx" || file_extension == ".xlsm" @@ -66,14 +56,14 @@ function read_mtg(file, attr_type=Dict, mtg_type=MutableNodeMTG; sheet_name=noth DelimitedFiles.writedlm(f, xlsx_data) seekstart(f) # test = String(take!(io)) - mtg, classes, description, features = parse_mtg_file(f, attr_type, mtg_type) + mtg, classes, description, features = parse_mtg_file(f, mtg_type) close(f) else # read the mtg file # f = open(file, "r") mtg, classes, description, features = open(file, "r") do f - parse_mtg_file(f, attr_type, mtg_type) + parse_mtg_file(f, mtg_type) end end @@ -83,7 +73,7 @@ function read_mtg(file, attr_type=Dict, mtg_type=MutableNodeMTG; sheet_name=noth return mtg end -function parse_mtg_file(f, attr_type, mtg_type) +function parse_mtg_file(f, mtg_type) line = [0] l = [""] l[1] = next_line!(f, line) @@ -132,7 +122,7 @@ function parse_mtg_file(f, attr_type, mtg_type) # Parse the mtg FEATURES section: if issection(l[1], "MTG") - global mtg = parse_mtg!(f, classes, features, line, l, attr_type, mtg_type) + global mtg = parse_mtg!(f, classes, features, line, l, mtg_type) continue end diff --git a/src/types/Attributes.jl b/src/types/Attributes.jl new file mode 100644 index 0000000..49a4ea7 --- /dev/null +++ b/src/types/Attributes.jl @@ -0,0 +1,631 @@ +""" +Marker type used by `read_mtg` to request the columnar attribute backend. +""" +struct ColumnarStore end + +mutable struct SubtreeIndexCache + dirty::Bool + built::Bool + strategy::Symbol + query_count::Int + mutation_count::Int + tin::Vector{Int} + tout::Vector{Int} + dfs_order::Vector{Int} +end + +SubtreeIndexCache() = SubtreeIndexCache(true, false, :auto, 0, 0, Int[], Int[], Int[]) + +mutable struct Column{T} + name::Symbol + data::Vector{T} + default::T +end + +mutable struct SymbolBucket + symbol::Symbol + row_to_node::Vector{Int} + node_to_row::Dict{Int,Int} + col_index::Dict{Symbol,Int} + columns::Vector{Any} # stores Column{T} + col_types::Vector{Any} +end + +function SymbolBucket(symbol::Symbol) + SymbolBucket(symbol, Int[], Dict{Int,Int}(), Dict{Symbol,Int}(), Any[], Any[]) +end + +mutable struct MTGAttributeStore + symbol_to_bucket::Dict{Symbol,Int} + buckets::Vector{SymbolBucket} + node_bucket::Vector{Int} + node_row::Vector{Int} + subtree_index::SubtreeIndexCache +end + +struct ColumnarQueryPlan + key::Symbol + col_idx_by_bucket::Vector{Int} +end + +function MTGAttributeStore() + MTGAttributeStore(Dict{Symbol,Int}(), SymbolBucket[], Int[], Int[], SubtreeIndexCache()) +end + +@inline function _validate_descendants_strategy(strategy::Symbol) + strategy in (:auto, :pointer, :indexed) && return strategy + error("Unknown descendants strategy $(strategy). Expected :auto, :pointer, or :indexed.") +end + +@inline descendants_strategy(store::MTGAttributeStore) = store.subtree_index.strategy + +function descendants_strategy!(store::MTGAttributeStore, strategy::Symbol) + store.subtree_index.strategy = _validate_descendants_strategy(strategy) + return store +end + +@inline function _mark_subtree_index_mutation!(store::MTGAttributeStore) + idx = store.subtree_index + idx.dirty = true + if idx.built + idx.mutation_count += 1 + end + return nothing +end + +@inline function _can_use_index_without_rebuild(store::MTGAttributeStore) + idx = store.subtree_index + idx.built && !idx.dirty +end + +@inline function _descendants_should_rebuild_auto(idx::SubtreeIndexCache) + # Keep pointer traversal for mutation-heavy periods; switch when queries dominate. + idx.query_count >= max(8, 4 * idx.mutation_count) +end + +function _rebuild_subtree_index!(store::MTGAttributeStore, root) + nmax = length(store.node_bucket) + tin = Vector{Int}(undef, nmax) + tout = Vector{Int}(undef, nmax) + @inbounds for i in 1:nmax + tin[i] = 0 + tout[i] = 0 + end + + nactive = 0 + @inbounds for i in 1:nmax + nactive += store.node_bucket[i] == 0 ? 0 : 1 + end + dfs_order = Int[] + sizehint!(dfs_order, nactive) + + stack_nodes = Vector{typeof(root)}(undef, 1) + stack_pos = Vector{Int}(undef, 1) + stack_nodes[1] = root + stack_pos[1] = 0 + t = 0 + + while !isempty(stack_nodes) + current = stack_nodes[end] + pos = stack_pos[end] + + if pos == 0 + nid = node_id(current) + t += 1 + tin[nid] = t + push!(dfs_order, nid) + stack_pos[end] = 1 + pos = 1 + end + + ch = children(current) + if pos <= length(ch) + stack_pos[end] = pos + 1 + push!(stack_nodes, ch[pos]) + push!(stack_pos, 0) + else + tout[node_id(current)] = t + pop!(stack_nodes) + pop!(stack_pos) + end + end + + idx = store.subtree_index + idx.tin = tin + idx.tout = tout + idx.dfs_order = dfs_order + idx.dirty = false + idx.built = true + idx.query_count = 0 + idx.mutation_count = 0 + return idx +end + +function _prepare_subtree_index!(store::MTGAttributeStore, root) + idx = store.subtree_index + strategy = idx.strategy + + if strategy === :pointer + return false + elseif strategy === :indexed + _can_use_index_without_rebuild(store) || _rebuild_subtree_index!(store, root) + return true + end + + # :auto + if _can_use_index_without_rebuild(store) + return true + end + + idx.query_count += 1 + if _descendants_should_rebuild_auto(idx) + _rebuild_subtree_index!(store, root) + return true + end + + return false +end + +mutable struct NodeAttrRef + store::Union{Nothing,MTGAttributeStore} + node_id::Int +end + +mutable struct ColumnarAttrs <: AbstractDict{Symbol,Any} + ref::NodeAttrRef + staged::Dict{Symbol,Any} +end + +ColumnarAttrs() = ColumnarAttrs(NodeAttrRef(nothing, 0), Dict{Symbol,Any}()) +function ColumnarAttrs(d::AbstractDict) + staged = Dict{Symbol,Any}() + for (k, v) in d + staged[Symbol(k)] = v + end + ColumnarAttrs(NodeAttrRef(nothing, 0), staged) +end + +@inline _isbound(attrs::ColumnarAttrs) = attrs.ref.store !== nothing && attrs.ref.node_id > 0 + +@inline function _ensure_node_capacity!(store::MTGAttributeStore, node_id::Int) + if node_id > length(store.node_bucket) + old = length(store.node_bucket) + resize!(store.node_bucket, node_id) + resize!(store.node_row, node_id) + @inbounds for i in old+1:node_id + store.node_bucket[i] = 0 + store.node_row[i] = 0 + end + end + return nothing +end + +@inline function _normalize_attr_key(key) + key isa Symbol ? key : Symbol(key) +end + +function _get_or_create_bucket!(store::MTGAttributeStore, symbol::Symbol) + bid = get(store.symbol_to_bucket, symbol, 0) + if bid == 0 + push!(store.buckets, SymbolBucket(symbol)) + bid = length(store.buckets) + store.symbol_to_bucket[symbol] = bid + end + return bid +end + +@inline function _column(bucket::SymbolBucket, col_idx::Int) + bucket.columns[col_idx] +end + +function _widen_column!(bucket::SymbolBucket, col_idx::Int, ::Type{NewT}) where {NewT} + old_col = bucket.columns[col_idx] + old_data = old_col.data + new_data = Vector{NewT}(undef, length(old_data)) + @inbounds for i in eachindex(old_data) + new_data[i] = old_data[i] + end + new_default = convert(NewT, old_col.default) + bucket.columns[col_idx] = Column{NewT}(old_col.name, new_data, new_default) + bucket.col_types[col_idx] = NewT + return bucket.columns[col_idx] +end + +function _ensure_column_type!(bucket::SymbolBucket, col_idx::Int, value) + T = bucket.col_types[col_idx] + value_T = typeof(value) + if value_T <: T + return _column(bucket, col_idx) + end + NewT = Union{T,value_T} + return _widen_column!(bucket, col_idx, NewT) +end + +function _add_column_internal!(bucket::SymbolBucket, key::Symbol, ::Type{T}, default::T) where {T} + haskey(bucket.col_index, key) && error("Column $(key) already exists for symbol $(bucket.symbol).") + nrows = length(bucket.row_to_node) + col = Column{T}(key, fill(default, nrows), default) + push!(bucket.columns, col) + push!(bucket.col_types, T) + bucket.col_index[key] = length(bucket.columns) + return nothing +end + +function _add_nullable_column_internal!(bucket::SymbolBucket, key::Symbol, ::Type{T}) where {T} + TT = Union{Nothing,T} + _add_column_internal!(bucket, key, TT, nothing) +end + +function _set_value!(bucket::SymbolBucket, row::Int, key::Symbol, value) + col_idx = get(bucket.col_index, key, 0) + if col_idx == 0 + if value === nothing + _add_nullable_column_internal!(bucket, key, Any) + else + _add_nullable_column_internal!(bucket, key, typeof(value)) + end + col_idx = bucket.col_index[key] + end + + col = _ensure_column_type!(bucket, col_idx, value) + col.data[row] = value + return value +end + +function _get_value(bucket::SymbolBucket, row::Int, key::Symbol, default=nothing) + col_idx = get(bucket.col_index, key, 0) + col_idx == 0 && return default + _column(bucket, col_idx).data[row] +end + +function _bucket_row(ref::NodeAttrRef) + store = ref.store + store === nothing && error("Columnar attributes are not bound to a store.") + node_id = ref.node_id + node_id <= 0 && error("Invalid node id for columnar attributes: $(node_id)") + node_id > length(store.node_bucket) && error("Node id $(node_id) is outside of store bounds.") + bid = store.node_bucket[node_id] + row = store.node_row[node_id] + bid == 0 && error("Node id $(node_id) is no longer attached to the attribute store.") + return store, bid, row +end + +function _add_node_with_attrs!(store::MTGAttributeStore, node_id::Int, symbol::Symbol, attrs::AbstractDict) + _mark_subtree_index_mutation!(store) + _ensure_node_capacity!(store, node_id) + bid = _get_or_create_bucket!(store, symbol) + bucket = store.buckets[bid] + + row = length(bucket.row_to_node) + 1 + push!(bucket.row_to_node, node_id) + bucket.node_to_row[node_id] = row + + @inbounds for i in eachindex(bucket.columns) + col = bucket.columns[i] + push!(col.data, col.default) + end + + store.node_bucket[node_id] = bid + store.node_row[node_id] = row + + for (k, v) in attrs + _set_value!(bucket, row, _normalize_attr_key(k), v) + end + + return nothing +end + +function _remove_node!(store::MTGAttributeStore, node_id::Int) + _mark_subtree_index_mutation!(store) + node_id > length(store.node_bucket) && return nothing + bid = store.node_bucket[node_id] + bid == 0 && return nothing + + bucket = store.buckets[bid] + row = bucket.node_to_row[node_id] + last_row = length(bucket.row_to_node) + moved_node = 0 + + if row != last_row + moved_node = bucket.row_to_node[last_row] + bucket.row_to_node[row] = moved_node + bucket.node_to_row[moved_node] = row + end + pop!(bucket.row_to_node) + delete!(bucket.node_to_row, node_id) + + @inbounds for i in eachindex(bucket.columns) + col = bucket.columns[i] + if row != last_row + col.data[row] = col.data[last_row] + end + pop!(col.data) + end + + if moved_node != 0 + store.node_row[moved_node] = row + end + store.node_bucket[node_id] = 0 + store.node_row[node_id] = 0 + return nothing +end + +function _drop_column_internal!(bucket::SymbolBucket, key::Symbol) + col_idx = get(bucket.col_index, key, 0) + col_idx == 0 && return false + delete!(bucket.col_index, key) + deleteat!(bucket.columns, col_idx) + deleteat!(bucket.col_types, col_idx) + for i in col_idx:length(bucket.columns) + bucket.col_index[_column(bucket, i).name] = i + end + return true +end + +function _rename_column_internal!(bucket::SymbolBucket, from::Symbol, to::Symbol) + from_idx = get(bucket.col_index, from, 0) + from_idx == 0 && return false + haskey(bucket.col_index, to) && error("Column $(to) already exists for symbol $(bucket.symbol).") + delete!(bucket.col_index, from) + bucket.col_index[to] = from_idx + _column(bucket, from_idx).name = to + return true +end + +function _store_for_node_attrs(attrs::ColumnarAttrs) + _isbound(attrs) || return nothing + attrs.ref.store +end + +@inline function _columnar_store(node) + attrs = node_attributes(node) + attrs isa ColumnarAttrs || return nothing + _store_for_node_attrs(attrs) +end + +function build_columnar_query_plan(node, key::Symbol) + store = _columnar_store(node) + store === nothing && return nothing + col_idx_by_bucket = Vector{Int}(undef, length(store.buckets)) + @inbounds for i in eachindex(store.buckets) + col_idx_by_bucket[i] = get(store.buckets[i].col_index, key, 0) + end + ColumnarQueryPlan(key, col_idx_by_bucket) +end + +@inline function _symbol_bucket_ids(store::MTGAttributeStore, symbol_filter) + if symbol_filter === nothing + return collect(eachindex(store.buckets)) + elseif symbol_filter isa Symbol + bid = get(store.symbol_to_bucket, symbol_filter, 0) + return bid == 0 ? Int[] : Int[bid] + elseif symbol_filter isa Union{Tuple,AbstractArray} + out = Int[] + for sym in symbol_filter + bid = get(store.symbol_to_bucket, Symbol(sym), 0) + bid == 0 || push!(out, bid) + end + return out + else + bid = get(store.symbol_to_bucket, Symbol(symbol_filter), 0) + return bid == 0 ? Int[] : Int[bid] + end +end + +@inline function _remove_nothing_type(T) + T === Nothing && return Union{} + parts = Base.uniontypes(T) + isempty(parts) && return T + kept = [p for p in parts if p !== Nothing] + if isempty(kept) + return Union{} + elseif length(kept) == 1 + return kept[1] + end + acc = Union{kept[1],kept[2]} + @inbounds for i in 3:length(kept) + acc = Union{acc,kept[i]} + end + return acc +end + +function infer_columnar_attr_type(node, key::Symbol, symbol_filter, ignore_nothing::Bool) + store = _columnar_store(node) + store === nothing && return Any + bucket_ids = _symbol_bucket_ids(store, symbol_filter) + isempty(bucket_ids) && return Any + + has_any = false + missing_in_some = false + T = Union{} + + @inbounds for bid in bucket_ids + bucket = store.buckets[bid] + col_idx = get(bucket.col_index, key, 0) + if col_idx == 0 + missing_in_some = true + continue + end + has_any = true + col_T = bucket.col_types[col_idx] + T = T === Union{} ? col_T : typejoin(T, col_T) + end + + has_any || return Any + + if ignore_nothing + Tnn = _remove_nothing_type(T) + return Tnn === Union{} ? Any : Tnn + end + + if missing_in_some + return Union{Nothing,T} + end + + return T +end + +function init_columnar_root!(attrs::ColumnarAttrs, node_id::Int, symbol::Symbol) + _isbound(attrs) && return attrs + store = MTGAttributeStore() + _add_node_with_attrs!(store, node_id, symbol, attrs.staged) + attrs.ref.store = store + attrs.ref.node_id = node_id + empty!(attrs.staged) + return attrs +end + +function bind_columnar_child!(parent_attrs::ColumnarAttrs, child_attrs::ColumnarAttrs, node_id::Int, symbol::Symbol) + if _isbound(child_attrs) + child_attrs.ref.node_id = node_id + return child_attrs + end + + store = _store_for_node_attrs(parent_attrs) + store === nothing && error("Parent node is not attached to a columnar attribute store.") + _add_node_with_attrs!(store, node_id, symbol, child_attrs.staged) + child_attrs.ref.store = store + child_attrs.ref.node_id = node_id + empty!(child_attrs.staged) + return child_attrs +end + +function remove_columnar_node!(attrs::ColumnarAttrs) + store = _store_for_node_attrs(attrs) + store === nothing && return nothing + _remove_node!(store, attrs.ref.node_id) + attrs.ref.node_id = 0 + return nothing +end + +function add_column!(store::MTGAttributeStore, symbol::Symbol, key::Symbol, ::Type{T}; default::T) where {T} + bid = _get_or_create_bucket!(store, symbol) + _add_column_internal!(store.buckets[bid], key, T, default) + return store +end + +function drop_column!(store::MTGAttributeStore, symbol::Symbol, key::Symbol) + bid = get(store.symbol_to_bucket, symbol, 0) + bid == 0 && return store + _drop_column_internal!(store.buckets[bid], key) + return store +end + +function rename_column!(store::MTGAttributeStore, symbol::Symbol, from::Symbol, to::Symbol) + bid = get(store.symbol_to_bucket, symbol, 0) + bid == 0 && return store + _rename_column_internal!(store.buckets[bid], from, to) + return store +end + +function Base.length(attrs::ColumnarAttrs) + if !_isbound(attrs) + return length(attrs.staged) + end + _, bid, _ = _bucket_row(attrs.ref) + return length(attrs.ref.store.buckets[bid].columns) +end + +function Base.keys(attrs::ColumnarAttrs) + if !_isbound(attrs) + return collect(keys(attrs.staged)) + end + _, bid, _ = _bucket_row(attrs.ref) + bucket = attrs.ref.store.buckets[bid] + out = Vector{Symbol}(undef, length(bucket.columns)) + @inbounds for i in eachindex(bucket.columns) + out[i] = _column(bucket, i).name + end + out +end + +function Base.haskey(attrs::ColumnarAttrs, key::Symbol) + if !_isbound(attrs) + return haskey(attrs.staged, key) + end + _, bid, _ = _bucket_row(attrs.ref) + haskey(attrs.ref.store.buckets[bid].col_index, key) +end +Base.haskey(attrs::ColumnarAttrs, key) = haskey(attrs, _normalize_attr_key(key)) + +function Base.getindex(attrs::ColumnarAttrs, key::Symbol) + if !_isbound(attrs) + return attrs.staged[key] + end + store, bid, row = _bucket_row(attrs.ref) + bucket = store.buckets[bid] + col_idx = get(bucket.col_index, key, 0) + col_idx == 0 && throw(KeyError(key)) + return _column(bucket, col_idx).data[row] +end +Base.getindex(attrs::ColumnarAttrs, key) = getindex(attrs, _normalize_attr_key(key)) + +function Base.get(attrs::ColumnarAttrs, key::Symbol, default) + if !_isbound(attrs) + return get(attrs.staged, key, default) + end + store, bid, row = _bucket_row(attrs.ref) + return _get_value(store.buckets[bid], row, key, default) +end +Base.get(attrs::ColumnarAttrs, key, default) = get(attrs, _normalize_attr_key(key), default) + +function Base.setindex!(attrs::ColumnarAttrs, value, key::Symbol) + if !_isbound(attrs) + attrs.staged[key] = value + return value + end + store, bid, row = _bucket_row(attrs.ref) + _set_value!(store.buckets[bid], row, key, value) + return value +end +Base.setindex!(attrs::ColumnarAttrs, value, key) = setindex!(attrs, value, _normalize_attr_key(key)) + +function Base.iterate(attrs::ColumnarAttrs, state=nothing) + if !_isbound(attrs) + return state === nothing ? iterate(attrs.staged) : iterate(attrs.staged, state) + end + k = keys(attrs) + i = state === nothing ? 1 : state + i > length(k) && return nothing + key = k[i] + return (key => get(attrs, key, nothing), i + 1) +end + +function Base.pop!(attrs::ColumnarAttrs, key, default=nothing) + key_ = _normalize_attr_key(key) + if !_isbound(attrs) + return pop!(attrs.staged, key_, default) + end + + store, bid, row = _bucket_row(attrs.ref) + bucket = store.buckets[bid] + col_idx = get(bucket.col_index, key_, 0) + col_idx == 0 && return default + old = _column(bucket, col_idx).data[row] + _drop_column_internal!(bucket, key_) + return old +end + +function Base.delete!(attrs::ColumnarAttrs, key) + pop!(attrs, key, nothing) + return attrs +end + +function Base.empty!(attrs::ColumnarAttrs) + if !_isbound(attrs) + empty!(attrs.staged) + return attrs + end + store, bid, _ = _bucket_row(attrs.ref) + bucket = store.buckets[bid] + empty!(bucket.col_index) + empty!(bucket.columns) + empty!(bucket.col_types) + return attrs +end + +Base.copy(attrs::ColumnarAttrs) = Dict{Symbol,Any}(pairs(attrs)) + +function Base.show(io::IO, attrs::ColumnarAttrs) + print(io, "ColumnarAttrs(", Dict{Symbol,Any}(pairs(attrs)), ")") +end diff --git a/src/types/Node.jl b/src/types/Node.jl index 6743e3b..975dcaf 100644 --- a/src/types/Node.jl +++ b/src/types/Node.jl @@ -66,35 +66,51 @@ end @deprecate Node(name::String, id::Int, parent::Node, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:NamedTuple} Node(id, parent, MTG, attributes) @deprecate Node(name::String, id::Int, parent::Node, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:MutableNamedTuple} Node(id, parent, MTG, attributes) -# For the root: -function Node(id::Int, MTG::T, attributes::A) where {T<:AbstractNodeMTG,A} - Node(id, nothing, Vector{Node{T,A}}(), MTG, attributes, Dict{String,Vector{Node{T,A}}}()) +function Node(id::Int, MTG::T, attributes::ColumnarAttrs) where {T<:AbstractNodeMTG} + node = Node{T,ColumnarAttrs}( + id, nothing, Vector{Node{T,ColumnarAttrs}}(), MTG, attributes, Dict{String,Vector{Node{T,ColumnarAttrs}}}() + ) + init_columnar_root!(attributes, id, getfield(MTG, :symbol)) + return node end # If the id is not given, it is the root node, so we use 1 -Node(MTG::T, attributes) where {T<:AbstractNodeMTG} = Node(1, MTG, attributes) -# Not attributes given, by default we use Dict: -Node(id::Int, MTG::T) where {T<:AbstractNodeMTG} = Node(id, MTG, Dict{Symbol,Any}()) +Node(MTG::T, attributes) where {T<:AbstractNodeMTG} = Node(1, MTG, _to_columnar_attrs(attributes)) +Node(id::Int, MTG::T, attributes) where {T<:AbstractNodeMTG} = Node(id, MTG, _to_columnar_attrs(attributes)) +Node(id::Int, MTG::T) where {T<:AbstractNodeMTG} = Node(id, MTG, ColumnarAttrs()) -# Special case for the NamedTuple and MutableNamedTuple, else it overspecializes and we -# can't mutate attributes, i.e. we get somthing like -# Node{NodeMTG,MutableNamedTuple{(:a,), Tuple{Base.RefValue{Int64}}}} instead of just: -# Node{NodeMTG,MutableNamedTuple} -function Node(id::Int, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:MutableNamedTuple} - Node{M,MutableNamedTuple}(id, nothing, Vector{Node{M,MutableNamedTuple}}(), MTG, attributes, Dict{String,Vector{Node{M,MutableNamedTuple}}}()) +function _to_columnar_attrs(attributes::ColumnarAttrs) + attributes end -function Node(id::Int, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:NamedTuple} - Node{M,NamedTuple}(id, nothing, Vector{Node{M,NamedTuple}}(), MTG, attributes, Dict{String,Vector{Node{M,MutableNamedTuple}}}()) +function _to_columnar_attrs(attributes::AbstractDict) + ColumnarAttrs(attributes) end -# Add a node as a child of another node: -function Node(id::Int, parent::Node{M,A}, MTG::M, attributes::A) where {M<:AbstractNodeMTG,A} - node = Node(id, parent, Vector{Node{M,A}}(), MTG, attributes, Dict{String,Vector{Node{M,A}}}()) +function _to_columnar_attrs(attributes::NamedTuple) + ColumnarAttrs(Dict{Symbol,Any}(pairs(attributes))) +end + +function _to_columnar_attrs(attributes::MutableNamedTuple) + ColumnarAttrs(Dict{Symbol,Any}(pairs(attributes))) +end + +function _to_columnar_attrs(attributes) + throw(ArgumentError("Unsupported attribute container type $(typeof(attributes)); use ColumnarAttrs, AbstractDict, or NamedTuple-like values.")) +end + +function Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes::ColumnarAttrs) where {M<:AbstractNodeMTG} + node = Node{M,ColumnarAttrs}( + id, parent, Vector{Node{M,ColumnarAttrs}}(), MTG, attributes, Dict{String,Vector{Node{M,ColumnarAttrs}}}() + ) addchild!(parent, node) + bind_columnar_child!(node_attributes(parent), attributes, id, getfield(MTG, :symbol)) return node end +Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes) where {M<:AbstractNodeMTG} = + Node(id, parent, MTG, _to_columnar_attrs(attributes)) + function Node(id::Int, parent::Node{M,A}, MTG::T, attributes::A) where {M<:AbstractNodeMTG,A,T<:AbstractNodeMTG} error( "The parent node has an MTG encoding of type `$(M)`, but the MTG encoding you provide is of type `$(T)`,", @@ -102,27 +118,16 @@ function Node(id::Int, parent::Node{M,A}, MTG::T, attributes::A) where {M<:Abstr ) end -# Special case for NamedTuple here: -function Node(id::Int, parent::Node, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:NamedTuple} - node = Node{M,NamedTuple}(id, parent, Vector{Node{M,NamedTuple}}(), MTG, attributes, Dict{String,Vector{Node{M,NamedTuple}}}()) - addchild!(parent, node) - return node -end - -# Idem for MutableNamedTuple here: -function Node(id::Int, parent::Node, MTG::M, attributes::T) where {M<:AbstractNodeMTG,T<:MutableNamedTuple} - node = Node{M,MutableNamedTuple}(id, parent, Vector{Node{M,MutableNamedTuple}}(), MTG, attributes, Dict{String,Vector{Node{M,MutableNamedTuple}}}()) - addchild!(parent, node) - return node -end +Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M) where {M<:AbstractNodeMTG} = + Node(id, parent, MTG, ColumnarAttrs()) # If the id is not given, it is the root node, so we use 1 function Node(parent::Node, MTG::T, attributes) where {T<:AbstractNodeMTG} Node(new_id(get_root(parent)), parent, MTG, attributes) end -# Only the MTG is given, by default we use Dict as attributes: -Node(MTG::T) where {T<:AbstractNodeMTG} = Node(1, MTG, Dict{Symbol,Any}()) +# Only the MTG is given, by default we use ColumnarAttrs as attributes: +Node(MTG::T) where {T<:AbstractNodeMTG} = Node(1, MTG, ColumnarAttrs()) # Only the ID, MTG and parent are given, by default we use the parent attribute type: function Node(id::Int, parent::Node{N,A}, MTG::T) where {N<:AbstractNodeMTG,A,T<:AbstractNodeMTG} @@ -175,14 +180,23 @@ AbstractTrees.childtype(::Type{Node{T,A}}) where {T,A} = Node{T,A} Set the parent of the node. """ -reparent!(node::N, p::N2) where {N<:Node{T,A},N2<:Union{Nothing,Node{T,A}}} where {T,A} = setfield!(node, :parent, p) +function reparent!(node::N, p::N2) where {N<:Node{T,A},N2<:Union{Nothing,Node{T,A}}} where {T,A} + setfield!(node, :parent, p) + _mark_structure_mutation!(node) + p === nothing || _mark_structure_mutation!(p) + return p +end """ rechildren!(node::Node{T,A}, chnodes::Vector{Node{T,A}}) where {T,A} Set the children of the node. """ -rechildren!(node::Node{T,A}, chnodes::Vector{Node{T,A}}) where {T,A} = setfield!(node, :children, chnodes) +function rechildren!(node::Node{T,A}, chnodes::Vector{Node{T,A}}) where {T,A} + setfield!(node, :children, chnodes) + _mark_structure_mutation!(node) + return chnodes +end # AbstractTrees.childstatetype(::Type{Node{T,A}}) where {T,A} = Node{T,A} # Set the traits for Node: @@ -347,6 +361,130 @@ and should not be used directly. Use *e.g.* `node.key = value` to set a single a """ node_attributes!(node::Node{T,A}, attributes::A) where {T,A} = setfield!(node, :attributes, attributes) +""" + attribute(node::Node, key::Symbol; default=nothing) + +Get one attribute from a node. +""" +attribute(node::Node, key::Symbol; default=nothing) = get(node_attributes(node), key, default) +attribute(node::Node, key; default=nothing) = attribute(node, Symbol(key), default=default) + +""" + attribute!(node::Node, key::Symbol, value) + +Set one attribute on a node. +""" +function attribute!(node::Node, key::Symbol, value) + node_attributes(node)[key] = value + return value +end +attribute!(node::Node, key, value) = attribute!(node, Symbol(key), value) + +""" + attributes(node::Node; format=:namedtuple) + +Get all attributes from a node as a snapshot. +""" +function attributes(node::Node; format=:namedtuple) + attrs = node_attributes(node) + if format == :dict + return Dict{Symbol,Any}(pairs(attrs)) + elseif format == :namedtuple + k = collect(keys(attrs)) + vals = map(key -> get(attrs, key, nothing), k) + return NamedTuple{Tuple(k)}(Tuple(vals)) + else + error("Unknown format $(format). Expected :namedtuple or :dict.") + end +end + +""" + attribute_names(node::Node) + +Return the attribute names available for this node. +""" +attribute_names(node::Node) = collect(keys(node_attributes(node))) + +function _node_store(node::Node) + attrs = node_attributes(node) + attrs isa ColumnarAttrs || error("This operation requires a columnar attribute backend.") + store = _store_for_node_attrs(attrs) + store === nothing && error("Node is not bound to a columnar attribute store.") + return store +end + +@inline function _mark_structure_mutation!(node::Node) + attrs = node_attributes(node) + attrs isa ColumnarAttrs || return nothing + store = _store_for_node_attrs(attrs) + store === nothing && return nothing + _mark_subtree_index_mutation!(store) + return nothing +end + +function add_column!(node::Node, symbol::Symbol, key::Symbol, ::Type{T}; default::T) where {T} + add_column!(_node_store(node), symbol, key, T, default=default) +end + +function add_column!(node::Node, symbols::AbstractVector{Symbol}, key::Symbol, ::Type{T}; default::T) where {T} + store = _node_store(node) + for sym in symbols + add_column!(store, sym, key, T, default=default) + end + return node +end + +function drop_column!(node::Node, symbol::Symbol, key::Symbol) + drop_column!(_node_store(node), symbol, key) +end + +function drop_column!(node::Node, symbols::AbstractVector{Symbol}, key::Symbol) + store = _node_store(node) + for sym in symbols + drop_column!(store, sym, key) + end + return node +end + +function rename_column!(node::Node, symbol::Symbol, from::Symbol, to::Symbol) + rename_column!(_node_store(node), symbol, from, to) +end + +function rename_column!(node::Node, symbols::AbstractVector{Symbol}, from::Symbol, to::Symbol) + store = _node_store(node) + for sym in symbols + rename_column!(store, sym, from, to) + end + return node +end + +""" + descendants_strategy(node::Node) + descendants_strategy!(node::Node, strategy::Symbol) + +Get or set how `descendants(node, key, ...)` is computed for columnar MTGs. + +- `:auto` (default): choose automatically based on workload. +- `:pointer`: always follow parent/children links directly in the graph. +- `:indexed`: use a precomputed index for descendant lookups. + +The index is based on a Depth-First Search (DFS) visit order (visit a branch deeply, then the +next branch). It can speed up repeated descendant requests on mostly stable trees, while +`:pointer` is often better when the tree structure changes very frequently. +""" +function descendants_strategy(node::Node) + attrs = node_attributes(node) + attrs isa ColumnarAttrs || return :pointer + store = _store_for_node_attrs(attrs) + store === nothing && return :pointer + return descendants_strategy(store) +end + +function descendants_strategy!(node::Node, strategy::Symbol) + descendants_strategy!(_node_store(node), strategy) + return node +end + """ get_attributes(mtg) diff --git a/test/Project.toml b/test/Project.toml index 67e71b2..f9b076c 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -4,4 +4,5 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" MutableNamedTuples = "af6c499f-54b4-48cc-bbd2-094bba7533c7" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl index 64b6014..ee1b045 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,6 @@ using Test using MultiScaleTreeGraph -using DataFrames, Dates, MutableNamedTuples +using DataFrames, Dates using Graphs, AbstractTrees @testset "read_mtg" begin @@ -15,6 +15,10 @@ end include("test-nodes.jl") end +@testset "columnar backend" begin + include("test-columnar.jl") +end + @testset "descendants()" begin include("test-descendants.jl") end diff --git a/test/test-ancestors.jl b/test/test-ancestors.jl index b48a194..d08ee6e 100644 --- a/test/test-ancestors.jl +++ b/test/test-ancestors.jl @@ -5,23 +5,24 @@ # Using a leaf node from the mtg: leaf_node = get_node(mtg, 5) - @test ancestors(leaf_node, :Width; type=Union{Nothing,Float64}) == reverse(width_all[1:4]) + @test_logs (:warn, r"Keyword argument `type` in `ancestors` is deprecated") ancestors(leaf_node, :Width; type=Union{Nothing,Float64}) @test ancestors(leaf_node, :Width) == reverse(width_all[1:4]) d = ancestors(leaf_node, :Width, scale=3) - @test typeof(d) == Vector{Any} + @test typeof(d) == Vector{Union{Nothing,Float64}} @test length(d) == 1 @test d[1] == width_all[4] - d_typed = ancestors(leaf_node, :Width, type=Union{Nothing,Float64}) - @test typeof(d_typed) == Vector{Union{Nothing,Float64}} - @test ancestors(leaf_node, :Width, symbol=("Leaf", "Internode")) == width_all[[4]] + @test typeof(ancestors(leaf_node, :Width)) == Vector{Union{Nothing,Float64}} + d_symbol = ancestors(leaf_node, :Width, symbol=("Leaf", "Internode")) + @test d_symbol == width_all[[4]] + @test typeof(d_symbol) == Vector{Union{Nothing,Float64}} + @test typeof(ancestors(leaf_node, :Width, ignore_nothing=true)) == Vector{Float64} @test ancestors(leaf_node, :Width, symbol=("Leaf", "Internode"), self=true) == width_all[end:-1:end-1] buf_vals = Union{Nothing,Float64}[] - @test ancestors!(buf_vals, leaf_node, :Width; type=Union{Nothing,Float64}) == - reverse(width_all[1:4]) + @test ancestors!(buf_vals, leaf_node, :Width) == reverse(width_all[1:4]) @test ancestors!(buf_vals, leaf_node, :Width, symbol=("Leaf", "Internode"), self=true) == width_all[end:-1:end-1] diff --git a/test/test-caching.jl b/test/test-caching.jl index 786f26d..0f65cda 100644 --- a/test/test-caching.jl +++ b/test/test-caching.jl @@ -1,7 +1,7 @@ file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))), "test", "files", "simple_plant.mtg") @testset "caching" begin - mtg = read_mtg(file, Dict) + mtg = read_mtg(file) # Cache all leaf nodes: cache_nodes!(mtg, symbol=:Leaf) @@ -63,4 +63,4 @@ file = joinpath(dirname(dirname(pathof(MultiScaleTreeGraph))), "test", "files", # Check that the nodes of the MTG where not modified: @test [node[:x] for node in AbstractTrees.PreOrderDFS(mtg)] == [1, 2, 3, 4, 5, 6, 7] -end \ No newline at end of file +end diff --git a/test/test-columnar.jl b/test/test-columnar.jl new file mode 100644 index 0000000..7add527 --- /dev/null +++ b/test/test-columnar.jl @@ -0,0 +1,69 @@ +using Tables + +file = joinpath(dirname(@__FILE__), "files", "simple_plant.mtg") +mtg = read_mtg(file) + +@test node_attributes(mtg) isa MultiScaleTreeGraph.ColumnarAttrs + +leaf = traverse(mtg, node -> node, symbol=:Leaf, type=typeof(mtg))[1] + +leaf_width = attribute(leaf, :Width, default=nothing) +@test leaf_width !== nothing +@test attribute(mtg, :Width, default=nothing) === nothing + +attribute!(leaf, :new_attr, 42.0) +@test attribute(leaf, :new_attr) == 42.0 +@test :new_attr in attribute_names(leaf) + +attrs_named = attributes(leaf, format=:namedtuple) +attrs_dict = attributes(leaf, format=:dict) +@test attrs_named.Width == leaf_width +@test attrs_dict[:Width] == leaf_width + +add_column!(mtg, :Leaf, :temperature, Float64, default=20.0) +@test attribute(leaf, :temperature) == 20.0 +drop_column!(mtg, :Leaf, :temperature) +@test attribute(leaf, :temperature, default=nothing) === nothing + +add_column!(mtg, :Leaf, :tmpcol, Float64, default=1.0) +rename_column!(mtg, :Leaf, :tmpcol, :tmpcol2) +@test attribute(leaf, :tmpcol2) == 1.0 + +leaf_table = symbol_table(mtg, :Leaf) +leaf_df = DataFrame(leaf_table) +@test :node_id in Symbol.(names(leaf_df)) +@test :Width in Symbol.(names(leaf_df)) +@test nrow(leaf_df) > 0 + +all_table = mtg_table(mtg) +all_df = DataFrame(all_table) +@test :node_id in Symbol.(names(all_df)) +@test :symbol in Symbol.(names(all_df)) +@test all_df.node_id == list_nodes(mtg) +@test any(ismissing, all_df.Width) + +# Hybrid descendants traversal strategy. +@test descendants_strategy(mtg) == :auto +descendants_strategy!(mtg, :indexed) +@test descendants_strategy(mtg) == :indexed + +store = MultiScaleTreeGraph._node_store(mtg) +@test store.subtree_index.dirty +leaf_widths_before = descendants(mtg, :Width, symbol=:Leaf, ignore_nothing=true) +@test !store.subtree_index.dirty +@test store.subtree_index.built + +insert_child!( + mtg[1], + MutableNodeMTG(:+, :Leaf, 0, 3), + _ -> Dict{Symbol,Any}(:Width => 9.99, :Area => 0.01, :mass => 0.1), +) +@test store.subtree_index.dirty +leaf_widths_after = descendants(mtg, :Width, symbol=:Leaf, ignore_nothing=true) +@test !store.subtree_index.dirty +@test length(leaf_widths_after) == length(leaf_widths_before) + 1 +@test leaf_widths_after[end] == 9.99 + +descendants_strategy!(mtg, :pointer) +@test descendants_strategy(mtg) == :pointer +@test descendants(mtg, :Width, symbol=:Leaf, ignore_nothing=true) == leaf_widths_after diff --git a/test/test-conversion.jl b/test/test-conversion.jl index 0c9a763..febd1a7 100644 --- a/test/test-conversion.jl +++ b/test/test-conversion.jl @@ -1,4 +1,4 @@ -mtg = read_mtg("files/simple_plant.mtg", Dict); +mtg = read_mtg("files/simple_plant.mtg"); @testset "DataFrame" begin df_mtg = DataFrame(mtg, [:scales, :Length]) @@ -10,4 +10,4 @@ end meta_mtg = MetaGraph(mtg) @test meta_mtg[1][:scales] == [0, 1, 2, 3, 3] @test meta_mtg[7][:Length] == 0.2 -end \ No newline at end of file +end diff --git a/test/test-descendants.jl b/test/test-descendants.jl index ba89049..0b3a161 100644 --- a/test/test-descendants.jl +++ b/test/test-descendants.jl @@ -1,16 +1,18 @@ @testset "descendants" begin mtg = read_mtg("files/simple_plant.mtg") width_all = [nothing, nothing, 0.02, 0.1, 0.02, 0.1] - @test descendants(mtg, :Width; type=Union{Nothing,Float64}) == width_all + @test_logs (:warn, r"Keyword argument `type` in `descendants` is deprecated") descendants(mtg, :Width; type=Union{Nothing,Float64}) @test descendants(mtg, :Width) == width_all d = descendants(mtg, :Width, scale=1) - @test typeof(d) == Vector{Any} + @test typeof(d) == Vector{Union{Nothing,Float64}} @test length(d) == 1 @test d[1] === width_all[1] - d_typed = descendants(mtg, :Width, type=Union{Nothing,Float64}) - @test typeof(d_typed) == Vector{Union{Nothing,Float64}} - @test descendants(mtg, :Width, symbol=(:Leaf, :Internode)) == width_all[3:end] + @test typeof(descendants(mtg, :Width)) == Vector{Union{Nothing,Float64}} + d_symbol = descendants(mtg, :Width, symbol=(:Leaf, :Internode)) + @test d_symbol == width_all[3:end] + @test typeof(d_symbol) == Vector{Union{Nothing,Float64}} + @test typeof(descendants(mtg, :Width, ignore_nothing=true)) == Vector{Float64} mtg2 = mtg[1][1][1][2] @test descendants(mtg2, :Width, symbol=:Leaf)[1] == width_all[end] @@ -46,5 +48,5 @@ end # using BenchmarkTools # @benchmark descendants($mtg, :Width) # 876 ns # @benchmark descendants!($mtg, :Width) # 5.6 μs -# @benchmark descendants($mtg, :Width; type=Union{Nothing,Float64}) # 7.6 μs +# @benchmark descendants($mtg, :Width) # inferred eltype # @benchmark descendants(mtg, :Width) diff --git a/test/test-insert_node.jl b/test/test-insert_node.jl index d6c496e..485b8bb 100644 --- a/test/test-insert_node.jl +++ b/test/test-insert_node.jl @@ -78,7 +78,8 @@ end @test length(mtg) == length_before + 1 @test node_mtg(mtg[1][1]) == template @test node_id(mtg[1][1]) == 8 - @test node_attributes(mtg[1][1]) == Dict{Symbol,Any}(:Total_Length => 0.6) + @test node_attributes(mtg[1][1]) isa MultiScaleTreeGraph.ColumnarAttrs + @test mtg[1][1][:Total_Length] == 0.6 end diff --git a/test/test-mutation.jl b/test/test-mutation.jl index b8e6e38..108d454 100644 --- a/test/test-mutation.jl +++ b/test/test-mutation.jl @@ -1,5 +1,5 @@ @testset "mutate_node!" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Using a leaf node from the mtg: leaf_node = get_node(mtg, 5) # Add a new attributes, x based on node field, y based on x and z using a function: @@ -21,7 +21,7 @@ end @testset "mutate_mtg!" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Using a leaf node from the mtg: leaf_node = get_node(mtg, 5) @@ -39,43 +39,15 @@ end end @testset "append!" begin - # On Dict attributes - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") append!(mtg, (test=3,)) @test haskey(mtg, :test) @test mtg[:test] == 3 - - # On MutableNamedTuple attributes - mtg = read_mtg("files/simple_plant.mtg", MutableNamedTuple) - # Using a leaf node from the mtg: - append!(mtg, (test=3,)) - @test haskey(mtg, :test) - @test mtg[:test] == 3 - - # On NamedTuple attributes - mtg = read_mtg("files/simple_plant.mtg", NamedTuple) - # Using a leaf node from the mtg: - append!(mtg, (test=3,)) - @test haskey(mtg, :test) - @test mtg[:test] == 3 end @testset "pop!" begin - # On Dict attributes - mtg = read_mtg("files/simple_plant.mtg", Dict) - pop!(mtg, :symbols) - @test !haskey(mtg, :symbols) - - # On MutableNamedTuple attributes - mtg = read_mtg("files/simple_plant.mtg", MutableNamedTuple) - # Using a leaf node from the mtg: - pop!(mtg, :symbols) - @test !haskey(mtg, :symbols) - - # On NamedTuple attributes - mtg = read_mtg("files/simple_plant.mtg", NamedTuple) - # Using a leaf node from the mtg: + mtg = read_mtg("files/simple_plant.mtg") pop!(mtg, :symbols) @test !haskey(mtg, :symbols) end diff --git a/test/test-nodes.jl b/test/test-nodes.jl index 1949917..1afb4e1 100644 --- a/test/test-nodes.jl +++ b/test/test-nodes.jl @@ -63,7 +63,8 @@ end mtg_code = MultiScaleTreeGraph.NodeMTG(:/, :Plant, 1, 1) mtg = MultiScaleTreeGraph.Node(mtg_code) @test get_attributes(mtg) == [] - @test node_attributes(mtg) == Dict{Symbol,Any}() + @test node_attributes(mtg) isa MultiScaleTreeGraph.ColumnarAttrs + @test isempty(node_attributes(mtg)) @test node_id(mtg) == 1 @test parent(mtg) === nothing @test node_mtg(mtg) == mtg_code @@ -79,7 +80,8 @@ end @test parent(internode) == mtg @test node_id(internode) == 2 @test node_mtg(internode) == MultiScaleTreeGraph.NodeMTG(:/, :Internode, 1, 2) - @test node_attributes(internode) == Dict{Symbol,Any}() + @test node_attributes(internode) isa MultiScaleTreeGraph.ColumnarAttrs + @test isempty(node_attributes(internode)) @test children(mtg) == [internode] end @@ -117,6 +119,6 @@ end @testset "Adding a child with a different MTG encoding type" begin - mtg = read_mtg(file, Dict, MutableNodeMTG) + mtg = read_mtg(file, MutableNodeMTG) VERSION >= v"1.7" && @test_throws "The parent node has an MTG encoding of type `MutableNodeMTG`, but the MTG encoding you provide is of type `NodeMTG`, please make sure they are the same." Node(mtg, NodeMTG(:/, :Branch, 1, 2)) end diff --git a/test/test-read_mtg.jl b/test/test-read_mtg.jl index a02ce9b..ed2f851 100644 --- a/test/test-read_mtg.jl +++ b/test/test-read_mtg.jl @@ -1,4 +1,4 @@ -mtg = read_mtg("files/simple_plant.mtg", MutableNamedTuple, NodeMTG); +mtg = read_mtg("files/simple_plant.mtg", NodeMTG) classes = get_classes(mtg) features = get_features(mtg) @@ -7,139 +7,56 @@ features = get_features(mtg) @test size(classes) == (5, 5) @test String.(classes.SYMBOL) == ["Scene", "Individual", "Axis", "Internode", "Leaf"] @test classes.SCALE == [0, 1, 2, 3, 3] - @test classes.DECOMPOSITION == ["FREE" for i = 1:5] - @test classes.INDEXATION == ["FREE" for i = 1:5] - @test classes.DEFINITION == ["IMPLICIT" for i = 1:5] -end - -@testset "test description" begin - @test typeof(mtg[:description]) == DataFrame - @test size(mtg[:description]) == (2, 4) - @test mtg[:description].LEFT == ["Internode", "Internode"] - @test mtg[:description].RELTYPE == ["+", "<"] - @test mtg[:description].MAX == ["?", "?"] end @testset "test features" begin @test typeof(features) == DataFrame @test size(features) == (5, 2) - @test features.NAME == [:Length, :Width, :XEuler, :dateDeath, :isAlive] - @test features.TYPE == ["REAL", "REAL", "REAL", "DD/MM/YY", "BOOLEAN"] + @test sort(collect(features.NAME)) == sort([:Length, :Width, :XEuler, :dateDeath, :isAlive]) end @testset "test mtg content" begin @test length(mtg) == 7 - @test typeof(mtg) == Node{NodeMTG,MutableNamedTuple} + @test typeof(mtg) == Node{NodeMTG,MultiScaleTreeGraph.ColumnarAttrs} @test node_id(mtg) == 1 - @test node_attributes(mtg)[:scales] == [0, 1, 2, 3, 3] - @test node_attributes(mtg)[:symbols] == ["Scene", "Individual", "Axis", "Internode", "Leaf"] + @test node_attributes(mtg) isa MultiScaleTreeGraph.ColumnarAttrs + @test mtg[:scales] == [0, 1, 2, 3, 3] + @test mtg[:symbols] == ["Scene", "Individual", "Axis", "Internode", "Leaf"] @test node_mtg(mtg) == NodeMTG("/", "Scene", 0, 0) - @test typeof(children(mtg)) <: Vector{Node{NodeMTG,MutableNamedTuple}} - @test typeof(mtg[1]) == Node{NodeMTG,MutableNamedTuple} - @test node_id(mtg[1]) == 2 - @test parent(mtg[1]) === mtg + @test typeof(children(mtg)) <: Vector{Node{NodeMTG,MultiScaleTreeGraph.ColumnarAttrs}} leaf_1 = get_node(mtg, 5) @test leaf_1[:Length] == 0.2 @test leaf_1[:Width] == 0.1 @test leaf_1[:isAlive] == false @test leaf_1[:dateDeath] == Date("2022-08-24") - - leaf_2 = get_node(mtg, 7) - @test leaf_2[:Length] == 0.2 - @test leaf_2[:Width] == 0.1 - @test leaf_2[:isAlive] == true - - Internode_2 = get_node(mtg, 6) - @test Internode_2[:Length] == 0.1 - @test Internode_2[:Width] == 0.02 - @test Internode_2[:isAlive] == true end @testset "test mtg mutation" begin - @test (node_attributes(mtg)[:scales] .= [0, 1, 2, 3, 4]) == [0, 1, 2, 3, 4] - @test MultiScaleTreeGraph.node_mtg!(mtg, MultiScaleTreeGraph.NodeMTG(:<, :Leaf, 2, 0)) == MultiScaleTreeGraph.NodeMTG(:<, :Leaf, 2, 0) - reparent!(mtg[1], nothing) - @test parent(mtg[1]) === nothing - node2 = mtg[1] - rechildren!(mtg, [node2]) - @test children(mtg) == [node2] -end - -@testset "test mtg with NamedTuples" begin - mtg = read_mtg("files/simple_plant.mtg", NamedTuple, NodeMTG) - - @test length(mtg) == 7 - @test typeof(mtg) == Node{NodeMTG,NamedTuple} - @test node_id(mtg) == 1 - @test node_mtg(mtg) == MultiScaleTreeGraph.NodeMTG(:/, :Scene, 0, 0) - @test typeof(children(mtg)) == Vector{Node{NodeMTG,NamedTuple}} - @test node_id(mtg[1]) == 2 - @test parent(mtg[1]) === mtg -end - -@testset "test mtg with Dict" begin - mtg = read_mtg("files/simple_plant.mtg", Dict, NodeMTG) - @test length(mtg) == 7 - @test typeof(mtg) == Node{MultiScaleTreeGraph.NodeMTG,Dict{Symbol,Any}} - @test node_id(mtg) == 1 - @test node_attributes(mtg) == Dict(:symbols => ["Scene", "Individual", "Axis", "Internode", "Leaf"], - :scales => [0, 1, 2, 3, 3], :description => mtg[:description]) - @test node_mtg(mtg) == MultiScaleTreeGraph.NodeMTG(:/, :Scene, 0, 0) - @test typeof(children(mtg)) == Vector{Node{NodeMTG,Dict{Symbol,Any}}} - @test node_id(mtg[1]) == 2 - @test parent(mtg[1]) === mtg -end - -@testset "test mtg with Dict: mutation" begin - mtg = read_mtg("files/simple_plant.mtg", Dict, NodeMTG) - @test (node_attributes(mtg)[:scales] = [0, 1, 2, 3, 4]) == [0, 1, 2, 3, 4] + @test (mtg[:scales] .= [0, 1, 2, 3, 4]) == [0, 1, 2, 3, 4] @test MultiScaleTreeGraph.node_mtg!(mtg, MultiScaleTreeGraph.NodeMTG(:<, :Leaf, 2, 0)) == MultiScaleTreeGraph.NodeMTG(:<, :Leaf, 2, 0) reparent!(mtg[1], nothing) @test parent(mtg[1]) === nothing - node2 = mtg[1] - rechildren!(mtg, [node2]) - @test children(mtg) == [node2] end - @testset "test mtg with empty lines" begin - mtg = read_mtg("files/simple_plant.mtg") + mtg1 = read_mtg("files/simple_plant.mtg") mtg2 = read_mtg("files/simple_plant-blanks.mtg") - - MTG1 = traverse(mtg) do x - (node_mtg(x), node_attributes(x)) - end - - MTG2 = traverse(mtg2) do x - (node_mtg(x), node_attributes(x)) - end - - @test MTG1 == MTG2 + @test traverse(mtg1, node_mtg) == traverse(mtg2, node_mtg) + @test traverse(mtg1, n -> Dict(pairs(node_attributes(n)))) == traverse(mtg2, n -> Dict(pairs(node_attributes(n)))) end @testset "mtg with several nodes in the same line" begin - mtg = read_mtg("files/simple_plant.mtg") + mtg1 = read_mtg("files/simple_plant.mtg") mtg2 = read_mtg("files/simple_plant-P1U1.mtg") - - MTG1 = traverse(mtg) do x - (node_mtg(x), node_attributes(x)) - end - - MTG2 = traverse(mtg2) do x - (node_mtg(x), node_attributes(x)) - end - - @test MTG1 == MTG2 + @test traverse(mtg1, node_mtg) == traverse(mtg2, node_mtg) + @test traverse(mtg1, n -> Dict(pairs(node_attributes(n)))) == traverse(mtg2, n -> Dict(pairs(node_attributes(n)))) end - @testset "mtg with no attributes" begin mtg = read_mtg("files/palm.mtg") - @test names(mtg) == [:scales, :description, :symbols] - traverse(mtg) do x - !MultiScaleTreeGraph.isroot(x) && @test node_attributes(x) == Dict{Symbol,Any}() + !MultiScaleTreeGraph.isroot(x) && @test isempty(node_attributes(x)) end end diff --git a/test/test-transform.jl b/test/test-transform.jl index 45fa7bd..754e503 100644 --- a/test/test-transform.jl +++ b/test/test-transform.jl @@ -1,5 +1,5 @@ @testset "transform!" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Test `function` form: transform!(mtg, x -> x[:x] = node_id(x)) diff --git a/test/test-traverse.jl b/test/test-traverse.jl index 886954d..9cb3624 100644 --- a/test/test-traverse.jl +++ b/test/test-traverse.jl @@ -1,5 +1,5 @@ @testset "traverse!" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Add a new attributes, x based on node field, y based on x and z using a function: traverse!(mtg, x -> x[:x] = length(node_id(x))) @test mtg[:x] == 1 @@ -11,7 +11,7 @@ end @testset "traverse" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Add a new attributes, x based on node field, y based on x and z using a function: node_ids = traverse(mtg, x -> node_id(x)) @test node_ids == collect(1:length(mtg)) @@ -24,7 +24,7 @@ end end @testset "traverse + filters" begin - mtg = read_mtg("files/simple_plant.mtg", Dict) + mtg = read_mtg("files/simple_plant.mtg") # Add a new attributes, x based on node field, y based on x and z using a function: @test traverse(mtg, x -> x, symbol="Internode") == [get_node(mtg, 4), get_node(mtg, 6)] @test traverse(mtg, x -> x, symbol=:Internode) == [get_node(mtg, 4), get_node(mtg, 6)] diff --git a/test/test-write_mtg.jl b/test/test-write_mtg.jl index 660fd8f..41429d8 100644 --- a/test/test-write_mtg.jl +++ b/test/test-write_mtg.jl @@ -18,12 +18,12 @@ mtg[:description] = nothing end -mtg = read_mtg("files/simple_plant.mtg", NamedTuple, NodeMTG) +mtg = read_mtg("files/simple_plant.mtg", NodeMTG) -@testset "Test write / read again: simple plant + NamedTuple" begin +@testset "Test write / read again: simple plant + NodeMTG" begin mtg2 = mktemp() do f, io write_mtg(f, mtg) - mtg2 = read_mtg(f, NamedTuple, NodeMTG) + mtg2 = read_mtg(f, NodeMTG) return mtg2 end @@ -88,4 +88,4 @@ mtg = read_mtg(file) @test descendants(mtg, :azimuth) == descendants(mtg2, :azimuth) @test traverse(mtg, symbol) == traverse(mtg2, symbol) @test traverse(mtg, index) == traverse(mtg2, index) -end \ No newline at end of file +end