diff --git a/.github/workflows/rust-tests.yml b/.github/workflows/rust-tests.yml new file mode 100644 index 0000000..5934c7d --- /dev/null +++ b/.github/workflows/rust-tests.yml @@ -0,0 +1,23 @@ +name: Rust Tests + +on: + push: + branches: + - '**' + workflow_dispatch: + +jobs: + test: + runs-on: ubuntu-latest + + defaults: + run: + working-directory: peptonizer_rust + + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + + - name: Run cargo test + run: cargo test diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml new file mode 100644 index 0000000..55cafcc --- /dev/null +++ b/.github/workflows/wheels.yml @@ -0,0 +1,94 @@ +name: Build Wheels + +on: + push: + branches: + - '**' + paths-ignore: + - 'peptonizer_rust/dist/**' + + workflow_dispatch: + +permissions: + contents: write + +jobs: + build: + strategy: + fail-fast: false + matrix: + include: + - name: linux-x86 + runs-on: ubuntu-latest + python-version: "3.12" + - name: windows-x86 + runs-on: windows-latest + python-version: "3.12" + - name: macos-arm + runs-on: macos-latest + python-version: "3.12" + - name: macos-x86 + runs-on: macos-26-intel + python-version: "3.12" + - name: linux-arm + runs-on: ubuntu-24.04-arm + python-version: "3.12" + + runs-on: ${{ matrix.runs-on }} + + defaults: + run: + working-directory: peptonizer_rust + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - uses: dtolnay/rust-toolchain@stable + + - uses: PyO3/maturin-action@v1 + with: + command: build + manylinux: auto + working-directory: peptonizer_rust + args: --release --out dist -i python3.12 + + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-${{ matrix.name }} + path: peptonizer_rust/dist/*.whl + + commit-wheels: + needs: build + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Download all wheel artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts + + - name: Copy wheels into repo + run: | + mkdir -p peptonizer_rust/dist + find artifacts -name "*.whl" -exec cp {} peptonizer_rust/dist/ \; + + - name: Commit wheels + run: | + git config user.name "github-actions" + git config user.email "github-actions@github.com" + + git add -f peptonizer_rust/dist/*.whl + + git commit -m "Update wheels [skip ci]" || echo "Generate wheel files" + + git push origin HEAD:${GITHUB_REF#refs/heads/} diff --git a/.gitignore b/.gitignore index cc94229..e036da0 100644 --- a/.gitignore +++ b/.gitignore @@ -83,7 +83,6 @@ __pycache__/ env/ build/ develop-eggs/ -dist/ downloads/ eggs/ .eggs/ @@ -96,3 +95,10 @@ wheels/ .installed.cfg *.egg *.tar.gz + +# Rust +peptonizer_rust/target/ +peptonizer_rust/Cargo.lock +peptonizer_rust/node_modules/ +peptonizer_rust/pkg/ +peptonizer_rust/yarn.lock diff --git a/peptonizer/peptonizer/__init__.py b/peptonizer/peptonizer/__init__.py deleted file mode 100644 index 0bbb884..0000000 --- a/peptonizer/peptonizer/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from .zero_lookahead_belief_propagation import run_belief_propagation, ZeroLookaheadProgressListener -from .plot_results import plot_peptonizer_results -from .parsers import parse_peptide_tsv -from .fetch_peptides_and_filter_taxa import fetch_peptides_and_filter_taxa -from .weight_taxa import perform_taxa_weighing -from .factor_graph_generation import generate_pepgm_graph -from .extract_taxon_scores import extract_taxon_scores, clean_csv, parse_taxon_scores -from .analyse_grid_search import find_best_parameters, ParameterSet, compute_goodness -from .taxa_clustering import cluster_taxa_based_on_similarity -from .unipept_communicator import UnipeptCommunicator -from .ncbi_ranks import NCBI_RANKS diff --git a/peptonizer/peptonizer/analyse_grid_search.py b/peptonizer/peptonizer/analyse_grid_search.py deleted file mode 100644 index 3822bbe..0000000 --- a/peptonizer/peptonizer/analyse_grid_search.py +++ /dev/null @@ -1,59 +0,0 @@ -import pandas as pd -import rbo - -from scipy.stats import entropy -from typing import List, NamedTuple, Tuple - - -class ParameterSet(NamedTuple): - """ - Represents a set of parameters that have been used for a Peptonizer analysis to tweak the behaviour of the belief - propagation algorithm. - """ - alpha: float - beta: float - prior: float - - -def compute_goodness(taxa_scores: dict, taxid_weights: pd.DataFrame): - # Sort the taxa_scores dictionary by score in descending order - sorted_tax_ids = sorted(taxa_scores.items(), key=lambda item: item[1], reverse=True) - - # Extract the sorted tax IDs and their corresponding scores - sorted_ids = [tax_id for tax_id, score in sorted_tax_ids] - sorted_scores = [score for tax_id, score in sorted_tax_ids] - - # Compute entropy of the posterior probability distribution - computed_entropy = entropy(sorted_scores) - - # Compute the rank-based similarity between weight-sorted taxa and score-sorted ID results - return rbo.RankingSimilarity( - taxid_weights['HigherTaxa'].values, - [int(tax_id) for tax_id in sorted_ids] - ).rbo() * (1 / computed_entropy ** 2) - - -def find_best_parameters(results: List[Tuple[dict, ParameterSet]], taxid_weights: pd.DataFrame): - """ - Given the dataframes that have been run through the Belief Propagation Algorithm before and the matching parameter - sets, compute a goodness metric for each of these dataframes and returns the ParameterSet that resulted in the - highest goodness value. - - :param results: A list of tuples each holding two things: - 1. A dict containing taxa and their associated scores after running the belief propagation algorithm - 2. The parameter values that where used during the belief propagation algorithm for this set of taxa - :param taxid_weights: A dataframe containing taxa and their corresponding 'scaled weights', as computed by the] - weight_taxa step. - """ - params = [] - goodness_list = [] - - for taxa_scores, param_set in results: - goodness_list.append(compute_goodness(taxa_scores, taxid_weights)) - params.append(param_set) - - metrics_params = zip(goodness_list, params) - sorted_metric_param_pairs = sorted(metrics_params, reverse = True) - - # Return the ParameterSet that's associated with the highest computed goodness metric. - return sorted_metric_param_pairs[0][1] diff --git a/peptonizer/peptonizer/array_utils.py b/peptonizer/peptonizer/array_utils.py deleted file mode 100644 index 9bfa512..0000000 --- a/peptonizer/peptonizer/array_utils.py +++ /dev/null @@ -1,24 +0,0 @@ -import numpy as np -import numpy.typing as npt - -from scipy.special import logsumexp - - -def normalize(array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - return array / np.sum(array) - - -# normalization of log probabilities -def log_normalize(array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - try: - y = np.exp(array - logsumexp(array)) - - except FloatingPointError: - print(array) - y = np.exp(array - logsumexp(array)) - return y - - -def avoid_underflow(array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - array[array < 1e-30] = 1e-30 - return array diff --git a/peptonizer/peptonizer/convolution_tree.py b/peptonizer/peptonizer/convolution_tree.py deleted file mode 100644 index 2994687..0000000 --- a/peptonizer/peptonizer/convolution_tree.py +++ /dev/null @@ -1,158 +0,0 @@ -import numpy as np -import numpy.typing as npt -import math - - -from . import array_utils -from typing import List, Optional -from scipy.signal import fftconvolve - - -# implementation of the convolution tree according to serang -# (see: https://bitbucket.org/orserang/convolutiontree/src/master/SplicoformSolver.py) -# not written by me!! -class CTNode: - def __init__(self, joint_above: npt.NDArray[np.float64]): - # normalize for greater precision - self.joint_above: npt.NDArray[np.float64] = array_utils.normalize(joint_above) - - self.left_parent: Optional[CTNode] = None - self.right_parent: Optional[CTNode] = None - - self.likelihood_below: Optional[npt.NDArray[np.float64]] = None - - # passing msges down: adding variables - @staticmethod - def create_count_node(lhs: 'CTNode', rhs: 'CTNode') -> 'CTNode': - # create a count node with joint prob for two parents above(vs the init if we have no parents) - joint_above = fftconvolve(lhs.joint_above, rhs.joint_above) - result = CTNode(joint_above) - - result.left_parent = lhs - result.right_parent = rhs - - return result - - # passing messages up : subtracting variables - def message_up(self, answer_size: int, other_joint_vector: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - assert self.likelihood_below is not None - - starting_point = len(other_joint_vector) - 1 - result = fftconvolve( - other_joint_vector[::-1], - self.likelihood_below - )[starting_point: starting_point + answer_size] - return array_utils.normalize(result) - - def message_up_left(self) -> npt.NDArray[np.float64]: - assert self.left_parent and self.left_parent.joint_above is not None - assert self.right_parent and self.right_parent.joint_above is not None - - return self.message_up( - len(self.left_parent.joint_above), self.right_parent.joint_above - ) - - def message_up_right(self) -> npt.NDArray[np.float64]: - assert self.left_parent and self.left_parent.joint_above is not None - assert self.right_parent and self.right_parent.joint_above is not None - - return self.message_up( - len(self.right_parent.joint_above), self.left_parent.joint_above - ) - - # once all messages are received - def posterior(self) -> npt.NDArray[np.float64]: - assert self.joint_above is not None - assert self.likelihood_below is not None - - return array_utils.normalize(self.joint_above * self.likelihood_below) - - def messages_up(self) -> npt.NDArray[np.float64]: - assert self.likelihood_below is not None - - return self.likelihood_below - - -class ConvolutionTree: - def __init__(self, n_to_shared_likelihoods: npt.NDArray[np.float64], proteins: List[npt.NDArray[np.float64]]): - self.n_to_shared_likelihoods: npt.NDArray[np.float64] = n_to_shared_likelihoods - self.log_length: int = int(math.ceil(np.log2(float(len(proteins))))) # length we need - self.all_layers: List[List[CTNode]] = [] - self.last_node: Optional[CTNode] = None - self.protein_layer: List[CTNode] = [] - - self._build_first_layer(proteins) - self._build_remaining_layers() - self._propagate_backward() - - self.n_proteins: int = len(proteins) - - def _build_first_layer(self, proteins: List[npt.NDArray[np.float64]]): - # construct first layer (of proteins) - layer: List[CTNode] = [] - for prot in proteins: - prot_node = CTNode(prot) - layer.append(prot_node) - - # pad with necessarily absent dummy variables so that the - # number of variables is a power of 2; this is not the most - # efficient method for this. because they are absent, they won't influence the - # total sum, and thus Ds. - for _ in range(0, 2 ** self.log_length - len(proteins)): - # this protein cannot be present, therefor set probability array to (0,1) - layer.append(CTNode(np.array([1, 0]))) # TODO change this order - - self.all_layers.append(layer) - - def _build_remaining_layers(self): - # construct layers of count nodes - for L in range(self.log_length): - # print('layers needed: ',int(len(self.allLayers[0])/(2**(L+1)))) - most_recent_layer = self.all_layers[-1] - layer: List[CTNode] = [] - for i in range(int(len(self.all_layers[0]) / (2 ** (L + 1)))): - left_parent = most_recent_layer[i * 2] - right_parent = most_recent_layer[i * 2 + 1] - count_node = CTNode.create_count_node(left_parent, right_parent) - layer.append(count_node) - - # add connection to remaining nodes (when layer above is not a power of 2) - self.all_layers.append(layer) - - # final node gets (Ds | N) multiplied into its likelihoodBelow - final_node = self.all_layers[-1][0] - # normalize for greater precision - final_node.likelihood_below = array_utils.normalize(self.n_to_shared_likelihoods) - self.last_node = final_node - - def _propagate_backward(self): - # propagate backward, setting likelihoodBelow. - # the loop has upper bound at logLength+1 - # because of the layer of proteins - for L in range(1, self.log_length + 1)[::-1]: - layer = self.all_layers[L] - - for i in range(len(layer)): - node = layer[i] - - left_parent = node.left_parent - right_parent = node.right_parent - - assert left_parent - assert right_parent - - left_parent.likelihood_below = node.message_up_left() - right_parent.likelihood_below = node.message_up_right() - - self.protein_layer = self.all_layers[0] - - def posterior_for_variable(self, prot_idx: int) -> npt.NDArray[np.float64]: - return self.protein_layer[prot_idx].posterior() - - def message_to_variable(self, prot_idx: int) -> npt.NDArray[np.float64]: - return self.protein_layer[prot_idx].messages_up() - - def message_to_shared_likelihood(self) -> npt.NDArray[np.float64]: - assert self.last_node - - return self.last_node.joint_above[0: (self.n_proteins + 1)] \ No newline at end of file diff --git a/peptonizer/peptonizer/extract_taxon_scores.py b/peptonizer/peptonizer/extract_taxon_scores.py deleted file mode 100644 index c7c20cc..0000000 --- a/peptonizer/peptonizer/extract_taxon_scores.py +++ /dev/null @@ -1,101 +0,0 @@ -import pandas as pd - -from os import linesep -from typing import Dict -from io import StringIO - -from .unipept_communicator import UnipeptCommunicator - - -def parse_taxon_scores(csv_content: str) -> Dict[int, float]: - """ - Read a CSV-file that's been produced by the PepGM algorithm, extract all the score values for all taxa and return - a dictionary mapping each taxon ID onto its associated score. - - Parameters - ---------- - csv_content: str, - A CSV-file that has been generated by running the PepGM algorithm. - """ - - # read csv using pandas - ids = pd.read_csv(StringIO(csv_content), names=["ID", "score", "type"]) - tax_ids = ids.loc[ids["type"] == "taxon"] - tax_ids = tax_ids.dropna() - - tax_ids.loc[:, "score"] = pd.to_numeric(tax_ids["score"], downcast="float") - tax_ids = tax_ids.sort_values("score") - - scores = tax_ids["score"].to_list() - - taxon_score_dict = dict() - - for (idx, tax_id) in enumerate(tax_ids["ID"]): - taxon_score_dict[int(tax_id)] = scores[idx] - - return taxon_score_dict - - -def extract_taxon_scores(csv_content: str) -> Dict[str, float]: - """ - Read a CSV-file that's been produced by the PepGM algorithm, extract all the score values for all taxa and return - a dictionary mapping each taxon name onto its associated score. - - Note: the big difference between this function and `parse_taxon_scores` is that `parse_taxon_scores` returns the - taxon IDs instead of taxon names. - - Parameters - ---------- - csv_content: str, - A CSV-file that has been generated by running the PepGM algorithm. - """ - - # read csv using pandas - taxon_scores = parse_taxon_scores(csv_content) - - unipept_communicator = UnipeptCommunicator() - - name_mapping = unipept_communicator.get_names_for_taxa([int(id) for id in taxon_scores.keys()]) - - taxon_score_dict = dict() - - for tax_id in taxon_scores.keys(): - taxon_score_dict[name_mapping[tax_id]] = taxon_scores[tax_id] - - return taxon_score_dict - - -def clean_csv(csv_content: str) -> str: - """ - Read a CSV-file that was produced by the PepGM algorithm and use it to produce a new CSV-file that only contains - the taxon-related information and scores. The string produced by this function can be written directly to a valid - CSV-file and contains three columns: taxon_name, taxon_id, score. - - Parameters - ---------- - csv_content: str, - A CSV-file that has been generated by running the PepGM algorithm. - """ - - # read csv using pandas - ids = pd.read_csv(StringIO(csv_content), names=["ID", "score", "type"]) - tax_ids = ids.loc[ids["type"] == "taxon"] - tax_ids = tax_ids.dropna() - - tax_ids.loc[:, "score"] = pd.to_numeric(tax_ids["score"], downcast="float") - tax_ids = tax_ids.sort_values("score") - - scores = tax_ids["score"].to_list() - - unipept_communicator = UnipeptCommunicator() - - name_mapping = unipept_communicator.get_names_for_taxa([int(id) for id in tax_ids["ID"]]) - - lines = [ - ",".join(["taxon_name", "taxon_id", "score"]) - ] - - for (idx, tax_id) in enumerate(tax_ids["ID"]): - lines.append(",".join([name_mapping[int(tax_id)], str(tax_id), str(scores[idx])])) - - return linesep.join(lines) diff --git a/peptonizer/peptonizer/factor_graph_generation.py b/peptonizer/peptonizer/factor_graph_generation.py deleted file mode 100644 index ad1ebbf..0000000 --- a/peptonizer/peptonizer/factor_graph_generation.py +++ /dev/null @@ -1,263 +0,0 @@ -from typing import List, Any - -import numpy as np -import networkx as nx -import pandas as pd - -from . import array_utils -from collections import namedtuple - - -class TaxonGraph(nx.Graph): - """ - Class with functions to construct a peptide-taxon graph using Entrez/NCBI mapping. - """ - def __init__(self): - super().__init__() - self.taxon_id_list = [] - - def create_from_taxa_weights(self, taxa_weights): - # drop rows that have an entry in HigherTaxa that appears only once - counts = taxa_weights["HigherTaxa"].value_counts() - taxa_weights = taxa_weights[ - taxa_weights["HigherTaxa"].isin(counts[counts > 1].index) - ] - new_graph = nx.from_pandas_edgelist(taxa_weights, "sequence", "HigherTaxa") - peptide_attributes = taxa_weights.apply( - lambda row: ( - row["sequence"], - { - "InitialBelief_0": 1 - float(row["score"]), - "InitialBelief_1": float(row["score"]), - "category": "peptide", - }, - ), - axis=1, - ) - taxa_attributes = taxa_weights.apply( - lambda row: (row["HigherTaxa"], {"category": "taxon"}), axis=1 - ) - intermediate_graph = nx.Graph() - intermediate_graph.add_edges_from(new_graph.edges) - intermediate_graph.add_nodes_from(peptide_attributes) - intermediate_graph.add_nodes_from(taxa_attributes) - - self.add_edges_from(intermediate_graph.edges) - self.add_nodes_from(peptide_attributes) - self.add_nodes_from(taxa_attributes) - - -class Factor: - # represents noisy OR cpds, has dimension n(parensports)xn(peptide states(=2)) - def __init__(self, cpd_array, variable_array): - Factor = namedtuple("Factor", ["array", "arrayLabels"]) - self.factor = Factor(cpd_array, variable_array) - - -# the variable and factor types might be unecessary as i do not need a lot of flexibility in the input - - -class FactorGraph(nx.Graph): - def __init__(self): - super().__init__() - - def construct_from_existing_graph(self, graph_data: nx.Graph): - node_list = list(graph_data.nodes(data=True)) - self.add_nodes_from(node_list) - for node in node_list: - # create noisy OR cpd per peptide - if node[1]["category"] == "peptide": - degree = graph_data.degree(node[0]) - neighbors = list(graph_data.neighbors(node[0])) - self.add_node(node[0] + " CPD", category="factor", ParentNumber=degree) - self.add_edges_from([(node[0] + " CPD", x) for x in neighbors]) - self.add_edge(node[0] + " CPD", node[0]) - - -# separate the connected components in the subgraph -def separate_subgraphs(graph_in, nodes_to_keep): - """ - separations of subgraphs (create news graphs for each subgraph) - - """ - new_graph = CTFactorGraph(nx.Graph()) - - # add nodes to keep - new_graph.add_nodes_from((n, graph_in.nodes[n]) for n in nodes_to_keep) - # add edges if they are present in original graph - new_graph.add_edges_from( - (n, nbr, d) - for n, nbrs in graph_in.adj.items() - if n in nodes_to_keep - for nbr, d in nbrs.items() - if nbr in nodes_to_keep - ) - - return new_graph # ListOfFactorGraphs - - -class CTFactorGraph(FactorGraph): - """' - This class is a networkx graph representing the full graphical model with all variables, CTrees, and Noisy-OR factors - """ - - def __init__(self, graph_in, graph_type="Taxons"): - """ - takes either a graph or a path to a graphML file as input - - """ - super().__init__() - - graph_types = ["Proteins", "Taxons"] - if graph_type not in graph_types: - raise ValueError("Invalid Graphtype. Expected one of: %s" % graph_types) - - if graph_type == "Taxons": - self.category = "taxon" - elif graph_type == "Protein": - self.category = "protein" - - if isinstance(graph_in, str): - graph_in = nx.parse_graphml(graph_in) - - # need these to create a new instance of a CT fractorgraph and not overwrite the previous graph - self.add_edges_from(graph_in.edges(data=True)) - self.add_nodes_from(graph_in.nodes(data=True)) - - def add_ct_nodes(self): - """ - When creating the CTGraph and not just reading from a previously saved graph format, use this command to add the CT nodes - """ - # create the convolution tree nodes and connect them in the graph - list_of_edge_add_list = [] - list_of_edge_remove_list = [] - list_of_prot_lists = [] - list_of_cts = [] - list_of_factors = [] - - for node in self.nodes(data=True): - # go through all factors with degree>2 and get their protein lists, then generate their conv. trees - if node[1]["category"] == "factor" and self.degree[node[0]] > 2: - prot_list = [] - - for neighbour in self.neighbors(node[0]): - neighbour_node = self.nodes[neighbour] - if neighbour_node["category"] == self.category: - prot_list.append(neighbour) - - list_of_cts.append([1]) - list_of_factors.append(node[0]) - list_of_prot_lists.append(prot_list) - list_of_edge_add_list.append( - [("CTree " + " ".join(str(prot_list)), x) for x in prot_list] - ) - list_of_edge_remove_list.append([(node[0], x) for x in prot_list]) - - # Fill all info into graph structure, should probably do this inside the loop before, so that i can initialize - # the messages - for i in range(len(list_of_cts)): - self.add_node( - "CTree " + " ".join(str(list_of_prot_lists[i])), - category="convolution_tree", - NumberOfParents=len(list_of_prot_lists[i]), - ) - self.add_edge( - "CTree " + " ".join(str(list_of_prot_lists[i])), - list_of_factors[i], - MessageLength=len(list_of_prot_lists[i]) + 1, - ) - self.add_edges_from(list_of_edge_add_list[i]) - self.remove_edges_from(list_of_edge_remove_list[i]) - - def save_to_graph_ml(self, filename): - nx.write_graphml(self, filename) - - def to_graph_ml(self): - return '\n'.join(nx.generate_graphml(self)) - - def compute_network_attributes(self): - """ - Computes nodes attributes using builtin networkx functions - Returns degree centrality, closeness centrality, betweenness centrality and eigen centrality - """ - degree_centrality = dict( - sorted(nx.degree_centrality(self).items(), key=lambda item: item[1]) - ) - closeness_centrality = dict( - sorted(nx.closeness_centrality(self).items(), key=lambda item: item[1]) - ) - betweenness_centrality = dict( - sorted(nx.betweenness_centrality(self).items(), key=lambda item: item[1]) - ) - eigen_centrality = dict( - nx.eigenvector_centrality(self).items(), key=lambda item: item[1] - ) - - return ( - degree_centrality, - closeness_centrality, - betweenness_centrality, - eigen_centrality, - ) - - def fill_in_factors(self, alpha, beta, regularized): - """fills in the noisy or Factors according to detection and error probabilities given""" - - for node in self.nodes(data=True): - # create noisy OR cpd per peptide - if node[1]["category"] == "factor": - # add noisyOR factors - degree = node[1]["ParentNumber"] - # pre-define the CPD array and fill it with the noisyOR values - cpd_array = np.full([2, degree + 1], 1 - alpha) - cpd_array_regularized = np.full([2, degree + 1], 1 - alpha) - exponent_array = np.arange(0, degree + 1) - divide_array = np.concatenate( - (np.asarray([1]), np.arange(1, degree + 1)) - ) - # regularize cpd priors to penalize higher number of parents - # log domain to avoid underflow - cpd_array[0, :] = np.power(cpd_array[0, :], exponent_array) * (1 - beta) - cpd_array_regularized[0, :] = np.divide( - np.power(cpd_array[0, :], exponent_array) * (1 - beta), divide_array - ) - # check0 = cpd_array_regularized[0,:] - # check1 = cpd_array[0,:] - cpd_array[1, :] = np.add(-cpd_array[0, :], 1) - cpd_array_regularized[1, :] = np.add(-cpd_array_regularized[0, :], 1) - cpd_array = np.transpose(array_utils.normalize(cpd_array)) - cpd_array_regularized = array_utils.avoid_underflow( - np.transpose(array_utils.normalize(cpd_array_regularized)) - ) - if regularized: - factor_to_add = Factor( - cpd_array_regularized, - ["placeholder", [node[0] + "0", node[0] + "1"]], - ) - else: - factor_to_add = Factor( - cpd_array, ["placeholder", [node[0] + "0", node[0] + "1"]] - ) - - # add factor & its edges to network as an extra node - nx.set_node_attributes(self, {node[0]: factor_to_add}, "InitialBelief") - - def fill_in_priors(self, prior): - """fills in the taxon priors according to the given prior""" - - for node in self.nodes(data=True): - # create noisy OR cpd per peptide - if node[1]["category"] == "taxon": - nx.set_node_attributes( - self, - {node[0]: {"InitialBelief_0": 1 - prior, "InitialBelief_1": prior}}, - ) - -def generate_pepgm_graph( - taxa_weights_data_frame: pd.DataFrame -) -> CTFactorGraph: - taxon_graph = TaxonGraph() - taxon_graph.create_from_taxa_weights(taxa_weights_data_frame) - factor_graph = FactorGraph() - factor_graph.construct_from_existing_graph(taxon_graph) - return CTFactorGraph(factor_graph, "Taxons") diff --git a/peptonizer/peptonizer/fetch_peptides_and_filter_taxa.py b/peptonizer/peptonizer/fetch_peptides_and_filter_taxa.py deleted file mode 100644 index 296a15b..0000000 --- a/peptonizer/peptonizer/fetch_peptides_and_filter_taxa.py +++ /dev/null @@ -1,28 +0,0 @@ -from typing import Dict, List - -from .unipept_communicator import UnipeptCommunicator - -def fetch_peptides_and_filter_taxa( - peptides: List[str], - taxonomy_query: str, - rank: str, - unipept_communicator: UnipeptCommunicator, -) -> Dict[str, List[int]]: - # First we retrieve all taxa associated with the given taxa - peptides_taxa = unipept_communicator.get_taxa_for_peptides(peptides) - - # Then, we make sure to filter the taxa and only keep those that are associated to the taxa of interest indicated by - # the user. - - taxon_query_ids = [int(item) for item in taxonomy_query.split(",")] - - # Retrieve all (in)direct children of the filter taxa provided by the user - taxa_filter = set(unipept_communicator.get_descendants_for_taxa(taxon_query_ids, rank)) - - # Compute the intersection of the taxa that should be retained and the original list of taxa - for (peptide, taxa) in peptides_taxa.items(): - peptides_taxa[peptide] = list(set(taxa) & taxa_filter) - - return peptides_taxa - - diff --git a/peptonizer/peptonizer/ncbi_ranks.py b/peptonizer/peptonizer/ncbi_ranks.py deleted file mode 100644 index 83c8d45..0000000 --- a/peptonizer/peptonizer/ncbi_ranks.py +++ /dev/null @@ -1,29 +0,0 @@ -NCBI_RANKS = [ - "domain", - "realm", - "subkingdom", - "superphylum", - "phylum", - "subphylum", - "superclass", - "class", - "subclass", - "superorder", - "order", - "suborder", - "infraorder", - "superfamily", - "family", - "subfamily", - "tribe", - "subtribe", - "genus", - "subgenus", - "species_group", - "species_subgroup", - "species", - "subspecies", - "strain", - "varietas", - "forma" -] diff --git a/peptonizer/peptonizer/parsers.py b/peptonizer/peptonizer/parsers.py deleted file mode 100644 index 6812723..0000000 --- a/peptonizer/peptonizer/parsers.py +++ /dev/null @@ -1,32 +0,0 @@ -from typing import Dict, Tuple - -def parse_peptide_tsv(tsv_content: str) -> Tuple[Dict[str, float], Dict[str, int]]: - """ - Parses a TSV string with columns 'peptide' and 'score', where peptides can occur multiple times. - - :param tsv_content: str, content of a TSV file with 'peptide' and 'score' columns. - :return: Tuple containing two dictionaries: - - pep_score: Maps peptide sequence to score (float). - - pep_psm_counts: Maps peptide sequence to the count of occurrences. - """ - pep_score: Dict[str, float] = {} - pep_psm_counts: Dict[str, int] = {} - - # Process each line in the TSV content, skipping the header - for line in tsv_content.splitlines()[1:]: - if line.strip() == "": # Skip empty lines - continue - - peptide, score = line.split("\t") - score = float(score) - - # Update pep_score dictionary - pep_score[peptide] = score # Assumes the latest score in the file should be used - - # Update pep_psm_counts dictionary - if peptide in pep_psm_counts: - pep_psm_counts[peptide] += 1 - else: - pep_psm_counts[peptide] = 1 - - return pep_score, pep_psm_counts diff --git a/peptonizer/peptonizer/plot_results.py b/peptonizer/peptonizer/plot_results.py deleted file mode 100644 index c7e1bcb..0000000 --- a/peptonizer/peptonizer/plot_results.py +++ /dev/null @@ -1,57 +0,0 @@ -import pandas as pd -import numpy as np -import matplotlib.pyplot as plt - -from .unipept_communicator import UnipeptCommunicator - - -def plot_peptonizer_results(input_file: str, output_file: str, number_of_taxa: int = 25): - """ - Read the results of a Peptonizer run from a CSV-file (denoted by the input_file argument) and write bar charts - representing these results to a PNG-file. - """ - assert input_file.lower().endswith(".csv"), "Input file should be a CSV." - assert output_file.lower().endswith(".png"), "Output file should be a PNG." - - # read csv using pandas - ids = pd.read_csv(input_file, names=["ID", "score", "type"]) - tax_ids = ids.loc[ids["type"] == "taxon"] - tax_ids = tax_ids.dropna() - - tax_ids.loc[:, "score"] = pd.to_numeric(tax_ids["score"], downcast="float") - tax_ids = tax_ids.sort_values("score") - taxa_check = tax_ids.ID.tolist() - - # translate taxids to scientific names - unipept_communicator = UnipeptCommunicator() - - taxa_name_dict = unipept_communicator.get_names_for_taxa([int(x) for x in tax_ids["ID"]]) - taxa_names = [taxa_name_dict[int(tax)] for tax in taxa_check] - scores = tax_ids["score"] - - # make the barplot - fig, ax = plt.subplots() - fig.set_size_inches(30, 15) - bars = ax.barh( - range(len(taxa_names[-number_of_taxa:])), - scores[-number_of_taxa:], - color="#283593", - ) - - ax.set_yticks(range(len(taxa_names[-number_of_taxa:]))) - ax.set_yticklabels(taxa_names[-number_of_taxa:], fontsize=24, color="#283593", fontweight="bold") - ax.tick_params(axis='y', which='major', pad=15) - plt.xlim((0, 1)) - plt.xlabel("Probability score", fontsize=35, fontweight="bold") - ax.xaxis.set_ticks(np.arange(0, 1.2, 0.2)) - ax.xaxis.set_ticklabels([0, 0.2, 0.4, 0.6, 0.8, 1.0], fontsize=35) - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - ax.spines["bottom"].set_visible(False) - ax.spines["left"].set_visible(False) - ax.bar_label(bars, fmt='{:,.3f}', fontsize=24, fontweight='bold', color='black', padding=20) - - fig.tight_layout() - - plt.savefig(output_file) - plt.close() diff --git a/peptonizer/peptonizer/pqdict.py b/peptonizer/peptonizer/pqdict.py deleted file mode 100644 index bbdb327..0000000 --- a/peptonizer/peptonizer/pqdict.py +++ /dev/null @@ -1,702 +0,0 @@ -"""Priority Queue Dictionary (pqdict). - -A Pythonic indexed priority queue. - -A dict-like heap queue to prioritize hashable objects while providing random -access and updatable priorities. Inspired by the ``heapq`` standard library -module, which was written by Kevin O'Connor and augmented by Tim Peters and -Raymond Hettinger. - -The priority queue is implemented as a binary heap of (key, priority value) -elements, which supports: - -- O(1) search for the item with highest priority - -- O(log n) removal of the item with highest priority - -- O(log n) insertion of a new item - -Additionally, an index maps each key to its element's location in the heap -and is kept up to date as the heap is manipulated. As a result, pqdict also -supports: - -- O(1) lookup of any item by key - -- O(log n) removal of any item - -- O(log n) updating of any item's priority level - -Documentation at . - -:copyright: (c) 2012-2024 by Nezar Abdennur. -:license: MIT, see LICENSE for more details. - -""" -from collections.abc import MutableMapping -from operator import gt, lt -from typing import ( - Any, - Callable, - Dict, - Iterable, - Iterator, - List, - Mapping, - NamedTuple, - Optional, - Tuple, - Type, - TypeVar, - Union, -) - -__version__ = "1.4.0" -__all__ = ["pqdict", "nlargest", "nsmallest"] - -DictInputs = Union[Mapping[Any, Any], Iterable[Tuple[Any, Any]]] -Tpqdict = TypeVar("Tpqdict", bound="pqdict") -PrioKeyFn = Callable[[Any], Any] -PrecedesFn = Callable[[Any, Any], bool] - - -class Empty(KeyError): - # Why specialize KeyError? Why not reuse queue.Empty? - # The Mapping protocol expects KeyError when popping from an empty map. - # This lets us distinguish between a key not in the map and an empty map. - pass - - -class Node(NamedTuple): - key: Any - value: Any - prio: Any - - -################### -# Heap algorithms # -################### -# The names of the heap operations in `heapq` (sift up/down) refer to the -# motion of the nodes being compared to, rather than the node being -# operated on as is usually done in textbooks (i.e. bubble down/up, -# instead). Here I use the sink/swim nomenclature from -# http://algs4.cs.princeton.edu/24pq/. The way I like to think of it, an -# item that is too "heavy" (low-priority) should sink down the tree, while -# one that is too "light" should float or swim up. - - -def _sink( - heap: List[Node], position: Dict[Any, int], precedes: PrecedesFn, top: int = 0 -) -> None: - # "Sink-to-the-bottom-then-swim" algorithm (Floyd, 1964) - # Tends to reduce the number of comparisons when inserting "heavy" - # items at the top, e.g. during a heap pop. See heapq for more details. - endpos = len(heap) - # Grab the top node - pos = top - node = heap[pos] - # Sift up a chain of child nodes - child_pos = 2 * pos + 1 - while child_pos < endpos: - # Choose the smaller child. - other_pos = child_pos + 1 - if other_pos < endpos and not precedes( - heap[child_pos].prio, heap[other_pos].prio - ): - child_pos = other_pos - child_node = heap[child_pos] - # Move it up one level. - heap[pos] = child_node - position[child_node.key] = pos - # Next level - pos = child_pos - child_pos = 2 * pos + 1 - # We are left with a "vacant" leaf. Put our node there and let it swim - # until it reaches its new resting place. - heap[pos] = node - position[node.key] = pos - _swim(heap, position, precedes, pos, top) - - -def _swim( - heap: List[Node], - position: Dict[Any, int], - precedes: PrecedesFn, - pos: int, - top: int = 0, -) -> None: - # Grab the node from its place - node = heap[pos] - # Sift parents down until we find a place where the node fits. - while pos > top: - parent_pos = (pos - 1) >> 1 - parent_node = heap[parent_pos] - if precedes(node.prio, parent_node.prio): - heap[pos] = parent_node - position[parent_node.key] = pos - pos = parent_pos - continue - break - # Put node in its new place - heap[pos] = node - position[node.key] = pos - - -def heapify(heap: List[Node], position: Dict[Any, int], precedes: PrecedesFn) -> None: - n = len(heap) - # No need to look at any leaf nodes. - for pos in reversed(range(n // 2)): - _sink(heap, position, precedes, pos) - - -def heaprepair( - heap: List[Node], position: Dict[Any, int], precedes: PrecedesFn, pos: int -) -> None: - # Repair the position of a modified node. - # Bubble up or down depending on values of parent and children. - parent_pos = (pos - 1) >> 1 - child_pos = 2 * pos + 1 - if parent_pos > -1 and precedes(heap[pos].prio, heap[parent_pos].prio): - _swim(heap, position, precedes, pos) - elif child_pos < len(heap): - other_pos = child_pos + 1 - if other_pos < len(heap) and not precedes( - heap[child_pos].prio, heap[other_pos].prio - ): - child_pos = other_pos - if precedes(heap[child_pos].prio, heap[pos].prio): - _sink(heap, position, precedes, pos) - - -def heappop( - heap: List[Node], position: Dict[Any, int], precedes: PrecedesFn, pos: int = 0 -) -> Node: - # Take the very last node and place it in the vacated spot. Let it - # sink or swim until it reaches its new resting place. - node_to_replace = heap[pos] - last = heap.pop() - if last is not node_to_replace: - heap[pos] = last - position[last.key] = pos - heaprepair(heap, position, precedes, pos) - del position[node_to_replace.key] - return node_to_replace - - -def heappush( - heap: List[Node], position: Dict[Any, int], precedes: PrecedesFn, node: Node -) -> None: - n = len(heap) - heap.append(node) - position[node.key] = n - _swim(heap, position, precedes, n) - - -def heapupdate( - heap: List[Node], position: Dict[Any, int], precedes: PrecedesFn, node: Node -) -> None: - pos = position[node.key] - heap[pos] = node - heaprepair(heap, position, precedes, pos) - - -def heappushpop( - heap: List[Node], position: Dict[Any, int], precedes: PrecedesFn, node: Node -) -> Node: - key = node.key - if heap and precedes(heap[0].prio, node.prio): - node, heap[0] = heap[0], node - position[key] = 0 - del position[node.key] - _sink(heap, position, precedes, 0) - return node - - -class pqdict(MutableMapping): - """A mutable dict/priority queue that maps hashable keys to priority values. - - As a priority queue, items can be added and the top-priority item can be - viewed or dequeued. In addition, arbitrary items may be retrieved, removed, - and have their priorities updated by key. - """ - - _heap: List[Node] - _position: Dict[Any, int] - - def __init__( - self, - data: Optional[DictInputs] = None, - key: Optional[PrioKeyFn] = None, - reverse: bool = False, - precedes: PrecedesFn = lt, - ): - """Create a new priority queue dictionary. - - Parameters - ---------- - data : mapping or iterable, optional - Input data, e.g. a dictionary or a sequence of items. - key : callable, optional - Optional priority key function to transform values into priority - keys for comparison. By default, the values are used directly as - priority keys and are not transformed. - reverse : bool, optional [default: ``False``] - If ``True``, *larger* priority keys give items *higher* priority. - Default is ``False``. - precedes : callable, optional [default: ``operator.lt``] - Function that determines precedence of a pair of priority keys. The - default comparator is ``operator.lt``, meaning *smaller* priority - keys give items *higher* priority. The callable must have the form - ``precedes(prio1, prio2) -> bool`` and return ``True`` if ``prio1`` - has higher priority than ``prio2``. Overrides ``reverse``. - - Notes - ----- - The default behavior is that of a **min**-priority queue, i.e. the item - with the *smallest* value is given *highest* priority. This behavior - can be reversed by specifying ``reverse=True`` or by providing a custom - precedence function via the ``precedes`` keyword argument. - Alternatively, use the explicit :meth:`pqdict.minpq` or - :meth:`pqdict.maxpq` class methods. - - """ - if reverse: - if precedes == lt: - precedes = gt - else: - raise ValueError("Got both `reverse=True` and a custom `precedes`.") - - if key is None or callable(key): - self._keyfn = key - else: - raise ValueError(f"`key` function must be a callable; got {key}") - - if callable(precedes): - self._precedes = precedes - else: - raise ValueError(f"`precedes` function must be a callable; got {precedes}") - - # The heap - self._heap = [] - - # The index - self._position = {} - - if data is not None: - self.update(data) - - @property - def precedes(self) -> PrecedesFn: - """Priority key precedence function.""" - return self._precedes - - @property - def keyfn(self) -> PrioKeyFn: - """Priority key function.""" - return self._keyfn if self._keyfn is not None else lambda x: x - - def __repr__(self) -> str: - """Return a string representation of the pqdict.""" - things = ", ".join([f"{node.key}: {node.value}" for node in self._heap]) - return f"{self.__class__.__name__}({things})" - - @classmethod - def minpq(cls: Type[Tpqdict], *args: Any, **kwargs: Any) -> Tpqdict: - """Create a pqdict with min-priority semantics: smallest value is highest. - - * ``pqdict.minpq()`` -> new empty pqdict with min-priority semantics - * ``pqdict.minpq(mapping)`` -> new minpq initialized from a mapping - * ``pqdict.minpq(iterable)`` -> new minpq initialized from an iterable of pairs - * ``pqdict.minpq(**kwargs)`` -> new minpq initialized with name=value pairs - """ - return cls(dict(*args, **kwargs), precedes=lt) - - @classmethod - def maxpq(cls: Type[Tpqdict], *args: Any, **kwargs: Any) -> Tpqdict: - """Create a pqdict with max-priority semantics: largest value is highest. - - * ``pqdict.maxpq()`` -> new empty pqdict with max-priority semantics - * ``pqdict.maxpq(mapping)`` -> new maxpq initialized from a mapping - * ``pqdict.maxpq(iterable)`` -> new maxpq initialized from an iterable of pairs - * ``pqdict.maxpq(**kwargs)`` -> new maxpq initialized with name=value pairs - """ - return cls(dict(*args, **kwargs), precedes=gt) - - ############ - # dict API # - ############ - __marker: object = object() - # __eq__ = MutableMapping.__eq__ - # __ne__ = MutableMapping.__ne__ - # keys = MutableMapping.keys - # values = MutableMapping.values - # items = MutableMapping.items - # get = MutableMapping.get - # clear = MutableMapping.clear - # update = MutableMapping.update - # setdefault = MutableMapping.setdefault - - @classmethod - def fromkeys( - cls: Type[Tpqdict], iterable: Iterable, value: Any, **kwargs: Any - ) -> Tpqdict: - """Return a new pqdict mapping keys from an iterable to the same value.""" - return cls(((k, value) for k in iterable), **kwargs) - - def __len__(self) -> int: - """Return number of items in the pqdict.""" - return len(self._heap) - - def __contains__(self, key: Any) -> bool: - """Return ``True`` if key is in the pqdict.""" - return key in self._position - - def __iter__(self) -> Iterator[Any]: - """Return an iterator over the keys of the pqdict. - - The order of iteration is arbitrary! Use ``popkeys`` to iterate over - keys in priority order. - """ - for node in self._heap: - yield node.key - - def __getitem__(self, key: Any) -> Any: - """Return the priority value of ``key``. - - Raises a ``KeyError`` if not in the pqdict. - """ - return self._heap[self._position[key]].value # raises KeyError - - def __setitem__(self, key: Any, value: Any) -> None: - """Assign a priority value to ``key``. - - If ``key`` is already in the pqdict, its priority value is updated. - """ - prio = self._keyfn(value) if self._keyfn else value - node = Node(key, value, prio) - if key in self._position: - heapupdate(self._heap, self._position, self._precedes, node) - else: - heappush(self._heap, self._position, self._precedes, node) - - def __delitem__(self, key: Any) -> None: - """Remove item. - - Raises a ``KeyError`` if key is not in the pqdict. - """ - if key not in self._position: - raise KeyError(key) - heappop(self._heap, self._position, self._precedes, self._position[key]) - - def copy(self: Tpqdict) -> Tpqdict: - """Return a shallow copy of a pqdict.""" - other = self.__class__(key=self._keyfn, precedes=self._precedes) - other._position = self._position.copy() - other._heap = self._heap[:] - return other - - def pop( - self, - key: Any = __marker, - default: Any = __marker, - ) -> Any: - """Hybrid pop method. - - With ``key``, perform a dictionary pop: - - * If ``key`` is in the pqdict, remove the item and return its - **value**. - * If ``key`` is not in the pqdict, return ``default`` if provided, - otherwise raise a ``KeyError``. - - Without ``key``, perform a priority queue pop: - - * Remove the top item and return its **key**. - * If the pqdict is empty, return ``default`` if provided, otherwise - raise ``Empty``. - """ - # pq semantics: remove and return top *key* (value is discarded) - if key is self.__marker: - if self._heap: - return heappop(self._heap, self._position, self._precedes).key - elif default is self.__marker: - raise Empty("pqdict is empty") - else: - return default - # dict semantics: remove and return *value* mapped from key - elif key in self._position: - return heappop( - self._heap, self._position, self._precedes, self._position[key] - ).value - elif default is self.__marker: - raise KeyError(key) - else: - return default - - ###################### - # Priority Queue API # - ###################### - def top(self, default: Any = __marker) -> Any: - """Return the key of the item with highest priority. - - If ``default`` is provided and pqdict is empty, then return ``default``, - otherwise raise ``Empty``. - """ - if self._heap: - return self._heap[0].key - elif default is self.__marker: - raise Empty("pqdict is empty") - else: - return default - - def topvalue(self, default: Any = __marker) -> Any: - """Return the value of the item with highest priority. - - If ``default`` is provided and pqdict is empty, then return ``default``, - otherwise raise ``Empty``. - """ - if self._heap: - return self._heap[0].value - elif default is self.__marker: - raise Empty("pqdict is empty") - else: - return default - - def topitem(self, default: Any = __marker) -> Tuple[Any, Any]: - """Return the item with highest priority. - - Raises ``Empty`` if pqdict is empty. - """ - if self._heap: - node = self._heap[0] - return node.key, node.value - elif default is self.__marker: - raise Empty("pqdict is empty") - else: - return default - - def popvalue(self, default: Any = __marker) -> Any: - """Remove and return the value of the item with highest priority. - - If ``default`` is provided and pqdict is empty, then return ``default``, - otherwise raise ``Empty``. - """ - if self._heap: - return heappop(self._heap, self._position, self._precedes).value - elif default is self.__marker: - raise Empty("pqdict is empty") - else: - return default - - def popitem(self, default: Any = __marker) -> Tuple[Any, Any]: - """Remove and return the item with highest priority. - - Raises ``Empty`` if pqdict is empty. - """ - if self._heap: - node = heappop(self._heap, self._position, self._precedes) - return node.key, node.value - elif default is self.__marker: - raise Empty("pqdict is empty") - else: - return default - - def additem(self, key: Any, value: Any) -> None: - """Add a new item. - - Raises ``KeyError`` if key is already in the pqdict. - """ - if key in self._position: - raise KeyError(f"{key} is already in the queue") - prio = self._keyfn(value) if self._keyfn else value - node = Node(key, value, prio) - heappush(self._heap, self._position, self._precedes, node) - - def updateitem(self, key: Any, new_val: Any) -> None: - """Update the priority value of an existing item. - - Raises ``KeyError`` if key is not in the pqdict. - """ - if key not in self._position: - raise KeyError(key) - prio = self._keyfn(new_val) if self._keyfn else new_val - node = Node(key, new_val, prio) - heapupdate(self._heap, self._position, self._precedes, node) - - def pushpopitem(self, key: Any, value: Any) -> Tuple[Any, Any]: - """Insert a new item and return the top-priority item. - - Equivalent to inserting a new item followed by removing the top - priority item, but faster. Raises ``KeyError`` if the new key is - already in the pqdict. - """ - if key in self._position: - raise KeyError(f"{key} is already in the queue") - prio = self._keyfn(value) if self._keyfn else value - node = heappushpop( - self._heap, self._position, self._precedes, Node(key, value, prio) - ) - return node.key, node.value - - def replace_key(self, key: Any, new_key: Any) -> None: - """Replace the key of an existing heap node in place. - - Raises ``KeyError`` if the key to replace does not exist or if the new - key is already in the pqdict. - """ - if new_key in self._position: - raise KeyError(f"{new_key} is already in the queue") - pos = self._position.pop(key) # raises appropriate KeyError - self._position[new_key] = pos - node = self._heap[pos] - self._heap[pos] = Node(new_key, node.value, node.prio) - - def swap_priority(self, key1: Any, key2: Any) -> None: - """Fast way to swap the priority level of two items in the pqdict. - - Raises ``KeyError`` if either key does not exist. - """ - heap = self._heap - position = self._position - if key1 not in position: - raise KeyError(key1) - if key2 not in position: - raise KeyError(key2) - pos1, pos2 = position[key1], position[key2] - node1, node2 = heap[pos1], heap[pos2] - heap[pos1] = Node(key2, node1.value, node1.prio) - heap[pos2] = Node(key1, node2.value, node2.prio) - position[key1], position[key2] = pos2, pos1 - - def popkeys(self) -> Iterator[Any]: - """Remove items, returning keys in descending order of priority rank.""" - try: - while True: - yield self.pop() - except Empty: - return - - def popvalues(self) -> Iterator[Any]: - """Remove items, returning values in descending order of priority rank.""" - try: - while True: - yield self.popvalue() - except Empty: - return - - def popitems(self) -> Iterator[Tuple[Any, Any]]: - """Remove and return items in descending order of priority rank.""" - try: - while True: - yield self.popitem() - except Empty: - return - - def heapify(self, key: Any = __marker) -> None: - """Repair a broken heap. - - If a change in a single, mutable value caused the break, you can - provide ``key`` to repair the heap by relocating that item. - """ - if key is self.__marker: - heapify(self._heap, self._position, self._precedes) - else: - if key not in self._position: - raise KeyError(key) - heaprepair(self._heap, self._position, self._precedes, self._position[key]) - - -############# -# Functions # -############# - - -def nlargest(n: int, mapping: Mapping, key: Optional[PrioKeyFn] = None): - """Return the n keys associated with the largest values in a mapping. - - Takes a mapping and returns the n keys associated with the largest values - in descending order. If the mapping has fewer than n items, all its keys - are returned. - - Parameters - ---------- - n : int - The number of keys associated with the largest values - in descending order - mapping : Mapping - A dict-like object - key : callable, optional - Optional priority key function to transform values into priority keys - for sorting. By default, the values are not transformed. - - Returns - ------- - list of up to n keys from the mapping associated with the largest values - - Notes - ----- - This function is equivalent to: - - >>> [item[0] for item in heapq.nlargest(n, mapping.items(), lambda x: x[1])] - - """ - it = iter(mapping.items()) - pq = pqdict(key=key, precedes=lt) - try: - for _ in range(n): - pq.additem(*next(it)) - except StopIteration: - pass - try: - while it: - pq.pushpopitem(*next(it)) - except StopIteration: - pass - out = list(pq.popkeys()) - out.reverse() - return out - - -def nsmallest(n: int, mapping: Mapping, key: Optional[PrioKeyFn] = None): - """Return the n keys associated with the smallest values in a mapping. - - Takes a mapping and returns the n keys associated with the smallest values - in ascending order. If the mapping has fewer than n items, all its keys are - returned. - - Parameters - ---------- - n : int - The number of keys associated with the smallest values - in ascending order - mapping : Mapping - A dict-like object - key : callable, optional - Optional priority key function to transform values into priority keys - for sorting. By default, the values are not transformed. - - Returns - ------- - list of up to n keys from the mapping associated with the smallest values - - Notes - ----- - This function is equivalent to: - - >>> [item[0] for item in heapq.nsmallest(n, mapping.items(), lambda x: x[1])] - - """ - it = iter(mapping.items()) - pq = pqdict(key=key, precedes=gt) - try: - for _ in range(n): - pq.additem(*next(it)) - except StopIteration: - pass - try: - while it: - pq.pushpopitem(*next(it)) - except StopIteration: - pass - out = list(pq.popkeys()) - out.reverse() - return out diff --git a/peptonizer/peptonizer/request_manager.py b/peptonizer/peptonizer/request_manager.py deleted file mode 100644 index 3e698c1..0000000 --- a/peptonizer/peptonizer/request_manager.py +++ /dev/null @@ -1,16 +0,0 @@ -import requests -import sys - - -class RequestManager: - @staticmethod - def perform_post_request(url, payload): - req = requests.Request('POST', url=url, json=payload) - prepared = req.prepare() - # This header needs to be removed since most browsers do not allow us to set it manually, this is only - # required when executing the peptonizer with Pyodide. - if sys.platform == 'emscripten': - del prepared.headers["Content-length"] - # Perform the HTTP POST request - s = requests.Session() - return s.send(prepared) diff --git a/peptonizer/peptonizer/taxa_clustering.py b/peptonizer/peptonizer/taxa_clustering.py deleted file mode 100644 index de25967..0000000 --- a/peptonizer/peptonizer/taxa_clustering.py +++ /dev/null @@ -1,73 +0,0 @@ -import networkx as nx -import pandas as pd - - -def get_peptides_per_taxon(graph_in: nx.Graph): - peptidome_dict = {} - for node in graph_in.nodes(data = True): - if node[1]['category'] == 'taxon' and node[0]: - neighbors = graph_in.neighbors(node[0]) - peptidome_dict.update({node[0]: [n[:-4] for n in neighbors]}) - return peptidome_dict - - -def compute_detected_peptidome_similarity(peptidome_dict): - sim_matrix_max = [] - taxa1 = [] - taxa2 = [] - for taxon1 in peptidome_dict.keys(): - taxa1.append(taxon1) - sim_matrix_max_row = [] - for taxon2 in peptidome_dict.keys(): - taxa2.append(taxon2) - peptides1 = set(peptidome_dict[taxon1]) - peptides2 = set(peptidome_dict[taxon2]) - shared = len(peptides1.intersection(peptides2)) - try: - sim = shared / (len(peptides2)) - except: - sim = 0 - - sim_matrix_max_row.append(sim) - - sim_matrix_max.append(sim_matrix_max_row) - - similarity_frame = pd.DataFrame(sim_matrix_max, columns = taxa1, index = taxa1) - return similarity_frame - - -def cluster_taxa_based_on_similarity( - pepgm_graph: nx.Graph, - taxid_weights: pd.DataFrame, - similarity_threshold: float -): - peptidome_dict = get_peptides_per_taxon(pepgm_graph) - similarities = compute_detected_peptidome_similarity(peptidome_dict) - - taxid_weights = taxid_weights.loc[taxid_weights.HigherTaxa.isin([int(x) for x in similarities.index.tolist()])] - - list_of_weight_sorted_taxa = taxid_weights.HigherTaxa.tolist() - taxa_cluster_list = [] - - cluster_heads = [taxid_weights.HigherTaxa[0]] - - while list_of_weight_sorted_taxa: - taxon1 = list_of_weight_sorted_taxa[0] - cluster_list = [] - cluster_heads.append(list_of_weight_sorted_taxa[0]) - - for taxon2 in taxid_weights.HigherTaxa: - if similarities[str(int(taxon2))][str(int(taxon1))] > similarity_threshold: - cluster_list.append(taxon2) - if taxon2 in list_of_weight_sorted_taxa: - list_of_weight_sorted_taxa.remove(taxon2) - - taxa_cluster_list.append(cluster_list) - - - clustered_weight_sorted_taxa = taxid_weights.loc[taxid_weights.HigherTaxa.isin(cluster_heads)] - clustered_weight_sorted_taxa2 = taxid_weights.loc[taxid_weights.HigherTaxa.isin([clustered_taxa[1:] for clustered_taxa in taxa_cluster_list])] - clustered_weight_sorted_taxa = pd.concat([clustered_weight_sorted_taxa, clustered_weight_sorted_taxa2]) - clustered_weight_sorted_taxa['Clustermembers'] = taxa_cluster_list - - return clustered_weight_sorted_taxa diff --git a/peptonizer/peptonizer/unipept_communicator.py b/peptonizer/peptonizer/unipept_communicator.py deleted file mode 100644 index f669bf8..0000000 --- a/peptonizer/peptonizer/unipept_communicator.py +++ /dev/null @@ -1,207 +0,0 @@ -from typing import Dict, List - -from .ncbi_ranks import NCBI_RANKS -from .request_manager import RequestManager - -class CommunicationException(Exception): - pass - -class UnipeptCommunicator: - """ - Class responsible for retrieving data from Unipept. - - The default implementation in this class is used by the Python version of the Peptonizer, but can be overridden in - case an alternative implementation is desired. - - :author: Pieter Verschaffelt - """ - - UNIPEPT_URL = "https://api.unipept.ugent.be" - UNIPEPT_PEPT2FILTERED_ENDPOINT = "/api/v2/pept2taxa" - UNIPEPT_TAXONOMY_ENDPOINT = "/api/v2/taxonomy" - - UNIPEPT_PEPTIDES_BATCH_SIZE = 2000 - TAXONOMY_ENDPOINT_BATCH_SIZE = 100 - - # Static cache to store previously retrieved lineages - lineage_cache = {} - - def get_taxa_for_peptides(self, peptides: List[str]) -> Dict[str, List[int]]: - """ - Queries Unipept and returns all the taxa that are associated with the given list of peptides. For each peptide - in the input, an entry in the output dictionary is created, which points to the taxon ids associated with this - peptide. - - :param peptides: List of peptide sequences for which all associated taxa should be queried. - - :raises CommunicationException: If the Unipept API server responds with an error, or if something goes wrong - with the network. - - :return: Dictionary mapping each peptide from the input list onto all of its associated taxa IDs. - """ - url = UnipeptCommunicator.UNIPEPT_URL + UnipeptCommunicator.UNIPEPT_PEPT2FILTERED_ENDPOINT - - output = dict() - - # Split the peptides into batches of a predefined size - for i in range(0, len(peptides), UnipeptCommunicator.UNIPEPT_PEPTIDES_BATCH_SIZE): - batch = peptides[i:i+UnipeptCommunicator.UNIPEPT_PEPTIDES_BATCH_SIZE] - - # Prepare the request payload - payload = { - "input": batch, - "compact": True, - "tryptic": True - } - - # Perform the HTTP POST request - response = RequestManager.perform_post_request(url, payload) - - # Check if the request was successful - if response.status_code == 200: - data = response.json() - for peptide_data in data: - original_taxa = peptide_data.get("taxa", []) - output[peptide_data["peptide"]] = original_taxa - else: - raise CommunicationException(f"Status code returned by Unipept API was {response.status_code}") - - return output - - def get_descendants_for_taxa(self, target_taxa: List[int], descendants_rank: str) -> List[int]: - """ - Returns a list of all taxon IDs that are descendants of the given taxa in `target_taxa`. - - :param target_taxa: A list of taxon IDs for which all descendants at a specific NCBI rank (and lower) should be - retrieved. - :param descendants_rank: The maximum rank that each of the descendants should have in the NCBI taxonomy. - All descendants that are defined at this rank or deeper are reported. - - :raises CommunicationException: If the Unipept API server responds with an error, or if something goes wrong - with the network. - - :return: A list of taxon IDs that meet the given rank criteria. - """ - url = UnipeptCommunicator.UNIPEPT_URL + UnipeptCommunicator.UNIPEPT_TAXONOMY_ENDPOINT - all_descendants = set() # Using a set to avoid duplicates - - # We need to get all children at the requested level, AND at lower levels. That's what we're using the ranks array - # for. - rank_idx = NCBI_RANKS.index(descendants_rank) - descendants_ranks = NCBI_RANKS[rank_idx:] - - # Split the target taxa into batches of 15 - for i in range(0, len(target_taxa), UnipeptCommunicator.TAXONOMY_ENDPOINT_BATCH_SIZE): - batch = target_taxa[i:i+UnipeptCommunicator.TAXONOMY_ENDPOINT_BATCH_SIZE] - - # Prepare the request payload - payload = { - "input": batch, - "descendants": True, - "descendants_ranks": descendants_ranks - } - - # Perform the HTTP POST request - response = RequestManager.perform_post_request(url, payload) - - # Check if the request was successful - if response.status_code == 200: - data = response.json() - - # Extract descendants from each item in the response - for item in data: - all_descendants.update(item.get("descendants", [])) - else: - raise CommunicationException(f"Status code returned by Unipept API was {response.status_code}") - - # Convert the set of descendants back to a list and return it - return list(all_descendants) - - def get_lineages_for_taxa(self, target_taxa: List[int]) -> Dict[int, List[int | None]]: - """ - Retrieve the lineage array for each of the taxon IDs in the provided target_taxa list. A lineage array is an - array containing exactly 27 entries. Each position in the array corresponds to either the taxon ID of the parent - node at that rank, or None if no parent is defined for this taxon at that rank. - - :param target_taxa: A list of taxon IDs for which the lineage arrays need to be retrieved. - - :raises CommunicationException: If the Unipept API server responds with an error, or if something goes wrong - with the network. - - :return: A dictionary that contains an entry for every taxon ID from the input, mapped onto its lineage array. - """ - url = UnipeptCommunicator.UNIPEPT_URL + UnipeptCommunicator.UNIPEPT_TAXONOMY_ENDPOINT - - # Remove duplicates from input and filter those already cached - target_taxa = set(target_taxa) - lineages = dict() - - # Prepare a list of taxa that are not yet in the cache - taxa_to_request = [taxon for taxon in target_taxa if taxon not in self.lineage_cache] - - # Fetch lineages from the API for taxa not in the cache - for i in range(0, len(taxa_to_request), UnipeptCommunicator.TAXONOMY_ENDPOINT_BATCH_SIZE): - batch = taxa_to_request[i:i+UnipeptCommunicator.TAXONOMY_ENDPOINT_BATCH_SIZE] - - payload = { - "input": batch, - "extra": True - } - - # Perform the HTTP POST request - response = RequestManager.perform_post_request(url, payload) - - if response.status_code == 200: - data = response.json() - for item in data: - lineage = [item.get(rank + "_id") for rank in NCBI_RANKS] - taxon_id = item["taxon_id"] - # Cache the retrieved lineage - self.lineage_cache[taxon_id] = lineage - else: - raise CommunicationException(f"Status code returned by Unipept API was {response.status_code}") - - # Collect results from the cache for all requested taxa - for taxon in target_taxa: - lineages[taxon] = self.lineage_cache.get(taxon) - - return lineages - - def get_names_for_taxa(self, target_taxa: List[int]) -> Dict[int, str]: - """ - Returns a mapping from taxon ID to taxon name for all taxa that have been provided to this function. - - :param target_taxa: A list of taxon IDs for which all corresponding taxon names should be retrieved. - - :raises CommunicationException: If the Unipept API server responds with an error, or if something goes wrong - with the network. - - :return: A dictionary mapping taxon IDs to taxon names. - """ - url = UnipeptCommunicator.UNIPEPT_URL + UnipeptCommunicator.UNIPEPT_TAXONOMY_ENDPOINT - - output = dict() - - # Split the target taxa into batches of 15 - for i in range(0, len(target_taxa), UnipeptCommunicator.TAXONOMY_ENDPOINT_BATCH_SIZE): - batch = target_taxa[i:i+UnipeptCommunicator.TAXONOMY_ENDPOINT_BATCH_SIZE] - - # Prepare the request payload - payload = { - "input": batch - } - - # Perform the HTTP POST request - response = RequestManager.perform_post_request(url, payload) - - # Check if the request was successful - if response.status_code == 200: - data = response.json() - - # Extract descendants from each item in the response - for item in data: - output[item.get("taxon_id")] = item.get("taxon_name") - else: - raise CommunicationException(f"Status code returned by Unipept API was {response.status_code}") - - return output diff --git a/peptonizer/peptonizer/weight_taxa.py b/peptonizer/peptonizer/weight_taxa.py deleted file mode 100644 index 7594c56..0000000 --- a/peptonizer/peptonizer/weight_taxa.py +++ /dev/null @@ -1,227 +0,0 @@ -from typing import List, Dict, Tuple - -import numpy as np -import pandas as pd - -from .unipept_communicator import UnipeptCommunicator -from .ncbi_ranks import NCBI_RANKS - - -def get_lineage_at_specified_rank( - tax_ids: List[int], - taxa_rank: str, - lineages: Dict[int, List[int | None]] -) -> List[int]: - """ - Returns the taxon ID of the specified rank in the lineage for all taxa IDs given as arguments. - - For example, given a taxon ID at the strain level and "species" as the value for the `taxa_rank` argument, - this function will return the taxon ID at the species level for the input taxon ID. - - :param tax_ids: List of taxon IDs to get the lineage of. - :param taxa_rank: Rank at which you want to pin the taxa. - :param lineages: A dictionary mapping taxon IDs to their lineages. These can be retrieved from the Unipept API (not - by this function) - - :return: List of taxon IDs at the specified rank for each input taxon ID. - """ - - # Get the index of the NCBI rank that we're interested in. This index is required to extract the taxon IDs from the - # correct place in the lineage. - rank_idx = NCBI_RANKS.index(taxa_rank) - - return [lineages[tax][rank_idx] for tax in tax_ids] - - -def weighted_random_sample(peptide_taxa: Dict[str, List[int]], n: int) -> Dict[str, List[int]]: - """ - Randomly select n pairs from the provided peptide_taxa dictionary. The chance for each peptide to be selected, - depends on the amount of taxa that are associated to this peptide. The more taxa, the lower the information - content of this peptide, and the lower the chance of being selected. - - :param peptide_taxa: A dictionary mapping peptide sequences onto the taxon IDs that they're associated with. - :param n: The amount of peptide-taxa pairs that should be selected from the provided peptide_taxa dictionary. - :return: A new peptide_taxa dictionary object containing only the n selected pairs. - """ - - # Calculate weights based on the length of the taxa array for each peptide - weights = np.array([1 / len(taxa) if taxa else 0 for taxa in peptide_taxa.values()]) - - # Normalize weights - total_weight = np.sum(weights) - if total_weight == 0: - raise ValueError("All objects have zero weight, cannot sample.") - - normalized_weights = weights / total_weight - - # Perform weighted sampling without replacement (so no items are selected more than once) - peptides = list(peptide_taxa.keys()) - # Fix the seed for the random generator such that we get reproducible results - rng = np.random.default_rng(1234) - sampled_indices = rng.choice( - len(peptides), - size=min(np.count_nonzero(normalized_weights), n), - replace=False, - p=normalized_weights - ) - - output = dict() - - # Retrieve sampled objects - for sampled_index in sampled_indices: - sampled_peptide = peptides[sampled_index] - output[sampled_peptide] = peptide_taxa[sampled_peptide] - - return output - - -def normalize_taxa( - peptide_taxa: Dict[str, List[int]], - taxa_rank: str, - unipept_communicator: UnipeptCommunicator -) -> Dict[str, List[int]]: - """ - Map all taxon IDs that are found (as keys) in the given peptide_taxa dictionary to the parent or child at taxa_rank. - - :param peptide_taxa: A dictionary that maps peptide sequences onto a list of taxon IDs that are associated to this - peptide. - :param taxa_rank: The NCBI taxon rank at which the taxon IDs should be mapped. Must be a valid NCBI rank that's - supported by Unipept. - :param unipept_communicator: Object that performs all network communication with the Unipept API. - :return: The same dictionary that was provided as input, but then with the modified taxa. The returned object is the - same as the input object. - """ - - # Retrieve the lineage data in larger batches to reduce the amount of requests that have to be made to Unipept - # Measure time for summing the peptide_taxa values - all_taxa = set() - for taxa in peptide_taxa.values(): - all_taxa.update(taxa) - lineages = unipept_communicator.get_lineages_for_taxa(list(all_taxa)) - - # Map all taxa onto the rank specified by the user - for (peptide, taxa) in peptide_taxa.items(): - taxa = [taxon for taxon in taxa if lineages[taxon] is not None] - if len(taxa) > 0: - peptide_taxa[peptide] = list(set(get_lineage_at_specified_rank(taxa, taxa_rank, lineages))) - return peptide_taxa - - -def perform_taxa_weighing( - peptide_taxa: Dict[str, List[int]], - pep_scores: Dict[str, float], - pep_psm_counts: Dict[str, int], - max_taxa: int, - unipept_communicator: UnipeptCommunicator, - taxa_rank="species" -) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ - Weight inferred taxa based on their (1) degeneracy and (2) proteome size. - - :param peptide_taxa: List of taxa associated with each peptide. - :param pep_scores: Scores associated with each peptide. - :param pep_psm_counts: PSM counts associated with each peptide. - :param max_taxa: Maximum number of taxa to include in the final graphical model. - :param unipept_communicator: Object that performs all network communication with the Unipept API. - :param taxa_rank: NCBI rank at which the Peptonizer analysis should be performed. Should be a rank that is - supported by Unipept. - - :return: Two dataframes: - 1) a dataframe containing all peptide sequences mapped onto it's normalized taxa and the weight computed by this - function. - 2) a dataframe containing each normalized taxon ID, it's scaled weight, and a column indicating if the taxon is - unique (i.e. associated to only one peptide) or not. - """ - print("Started mapping all taxon ids to the specified rank...") - peptide_taxa = normalize_taxa(peptide_taxa, taxa_rank, unipept_communicator) - peptide_taxa = weighted_random_sample(peptide_taxa, 10000) - - print(f"Using {len(peptide_taxa)} sequences as input...") - - # Convert a JSON object into a Pandas DataFrame - # record_path Parameter is used to specify the path to the nested list or dictionary that you want to normalize - print("Normalizing peptides and converting to dataframe...") - unipept_frame = pd.DataFrame(list(peptide_taxa.items()), columns=['sequence', 'taxa']) - - scores = unipept_frame["sequence"].map(pep_scores) - scores.name = "score" - - psms = unipept_frame["sequence"].map(pep_psm_counts) - psms.name = "psms" - - # Merge psm_score and number of psms - unipept_frame = pd.concat( - [ - unipept_frame, - scores, - psms - ], - axis=1, - ) - - # Score the degeneracy of a taxa, i.e., - # how conserved a peptide sequence is between taxa. - # map all taxids in the list in the taxa column back to their taxid at species level (or the rank specified by the user) - # Right now, HigherTaxa is simply a copy of taxa. This step still needs to be optimized. - unipept_frame["HigherTaxa"] = unipept_frame.apply( - lambda row: row["taxa"], axis=1 - ) - - # Divide the number of PSMs of a peptide by the number of taxa the peptide is associated with, exponentiated by 3 - print("Started dividing the number of PSMS of a peptide by the number the peptide is associated with...") - unipept_frame["weight"] = unipept_frame["psms"].div( - [len(element) ** 3 for element in unipept_frame["HigherTaxa"]] - ) - - mask = [len(element) == 1 for element in unipept_frame["HigherTaxa"]] - unique_psm_taxa = set(i[0] for i in unipept_frame["HigherTaxa"][mask]) - unipept_frame = unipept_frame.explode("HigherTaxa", ignore_index=True) - - # Sum up the weights of a taxon and sort by weight - print("Started summing the weights of a taxon and sorting them by weight...") - unipept_frame["log_weight"] = np.log10(unipept_frame["weight"] + 1) - tax_id_weights = unipept_frame.groupby("HigherTaxa")["log_weight"].sum().reset_index() - - # Since large proteomes tend to have more detectable peptides, - # we adjust the weight by dividing by the size of the proteome i.e., - # the number of proteins that are associated with a taxon - tax_id_weights["scaled_weight"] = tax_id_weights[ - "log_weight" - ] # / (TaxIDWeights["proteome_size"]) ** N - - # Retrieves the specified taxonomic rank taxid in the lineage of each of the species-level taxids returned by - # Unipept for both the UnipeptFrame and the TaxIdWeightFrame - higher_unique_psm_taxids = unique_psm_taxa # set([GetLineageAtSpecifiedRank(i,TaxaRank) for i in UniquePSMTaxa]) - - # group the duplicate entries of higher up taxa and sum their weights - print("Started grouping duplicate entries of taxa situated higher up and sum their weights...") - higher_taxid_weights = ( - tax_id_weights.groupby("HigherTaxa")["scaled_weight"] - .sum() - .reset_index() - .sort_values(by=["scaled_weight"], ascending=False) - ) - # HigherTaxidWeights = TaxIDWeights - higher_taxid_weights["Unique"] = np.where( - higher_taxid_weights["HigherTaxa"].isin(higher_unique_psm_taxids), True, False - ) - - try: - higher_taxid_weights = higher_taxid_weights[ - higher_taxid_weights.HigherTaxa != 1869227 - ] - except: - pass - - unipept_frame = unipept_frame.drop('taxa', axis=1) - - if len(higher_taxid_weights.HigherTaxa) < 50: - return unipept_frame, higher_taxid_weights - else: - taxa_to_include = set(higher_taxid_weights["HigherTaxa"][0:max_taxa]) - taxa_to_include.update(higher_unique_psm_taxids) - - return ( - unipept_frame[unipept_frame["HigherTaxa"].isin(taxa_to_include)], - higher_taxid_weights - ) diff --git a/peptonizer/peptonizer/zero_lookahead_belief_propagation.py b/peptonizer/peptonizer/zero_lookahead_belief_propagation.py deleted file mode 100644 index b7de586..0000000 --- a/peptonizer/peptonizer/zero_lookahead_belief_propagation.py +++ /dev/null @@ -1,638 +0,0 @@ -# implementation of belief propagation on a peptide-protein graph -# __________________________________________________________________________________________ -from enum import Enum -from sys import getsizeof - -from .convolution_tree import * -from .factor_graph_generation import * -from .pqdict import pqdict -from typing import Dict, Any, List, Tuple, Iterator - - -class ZeroLookaheadProgressListener: - def graphs_updated( - self, - current_graph: int, - total_graphs: int - ): - """ - Called when the belief propagation algorithm is started on a new subgraph. - - Parameters - ---------- - current_graph: int - Which subgraph in the whole factor graph the belief propagation algorithm is started for. - total_graphs: int - The total amount of subgraphs (or communities) that are present in the whole factor graph. - """ - pass - - def max_residual_updated( - self, - max_residual: float, - tolerance: float - ): - """ - Called when a new, better max_residual score has been found by the belief propagation algorithm. - - Parameters - ---------- - max_residual: float - The new max_residual score that has been found (will thus be lower than the previous one) - tolerance: float - The minimum residual score required for the convergence of the belief propagation algorithm to be considered - complete. - """ - pass - - def iterations_updated( - self, - current_iteration: int, - total_iterations: int, - ): - """ - Called everytime the belief propagation algorithm has finished processing one or more complete iterations of the - message loop. - - Parameters - ---------- - current_iteration: int - The current iteration for which execution has just completed. - total_iterations: int - The maximum amount of iterations that will be performed before the belief propagation algorithm is stopped. - Note that the algorithm can be stopped earlier if convergence has already been achieved. - """ - pass - - -class PrintZeroLookaheadProgressListener(ZeroLookaheadProgressListener): - def graphs_updated( - self, - current_graph: int, - total_graphs: int - ): - print(f"Finished calibration of {current_graph} of {total_graphs}") - - def max_residual_updated( - self, - max_residual: float, - tolerance: float - ): - print(f"Improved maximum residual to {max_residual}. Convergence is met when lower than {tolerance}.") - - def iterations_updated( - self, - current_iteration: int, - total_iterations: int, - ): - print(f"Finished message loop iteration {current_iteration} / {total_iterations}.") - - -class Category(Enum): - peptide = 0 - factor = 1 - convolution_tree = 2 - taxon = 3 - - -class Messages: - """ - Class holding all messages and beliefs. - Functions execute loopy residual belief propagation with zero look ahead - """ - - # class that holds the messages of iteration t and iteration t+1 as dictionaries - def __init__(self, ct_graph_in: CTFactorGraph): - self.max_val: Optional[Tuple[int, int]] = None - self.priorities: pqdict = pqdict({}, reverse=True) - self.category: Category = Category[ct_graph_in.category] - - amount_of_nodes = ct_graph_in.number_of_nodes() - - # Maps a node identifier (as specified by the CTGraph) onto a unique integer. - self.node_descriptions: Dict[str, int] = {} - # Reverse mapping of the dict above - self.node_id_to_description: List[str] = [] - # Maps a node ID onto its category - self.categories: List[Category] = [] - # Maps a node ID onto a list of its neighbouring node IDs - self.neighbours: List[List[int]] = [] - # Keeps track of the number of parents of a node - self.number_of_parents: List[int] = [0 for _ in range(amount_of_nodes)] - # Maps an edge as a tuple of node ids to an id - self.edge_ids: Dict[Tuple[int, int], int] = {} - # Reverse mapping of the dict above - self.edges: List[Tuple[int, int]] = [] - - # Keeps track of residuals for duos of edges, indexed by [edge index][neighbour index of edge's end node] - self.total_residuals: List[List[float]] = [] - - # Maps a node ID onto its initial belief value - self.initial_beliefs: List[npt.NDArray[np.float64]] = [] - # Maps a node ID onto its current belief value - self.current_beliefs: List[npt.NDArray[np.float64]] = [] - - # Incoming messages for each node - self.msg_in: List[List[npt.NDArray[np.float64]]] = [] - self.msg_in_new: List[List[npt.NDArray[np.float64]]] = [] - self.msg_in_log: List[List[npt.NDArray[np.float64]]] = [] - - nodes: Iterator[Tuple[Any, Dict[str, Any]]] = ct_graph_in.nodes(data=True) - - for (node_id, node) in enumerate(nodes): - # Each node should occur exactly once - assert node[0] not in self.node_descriptions - - self.node_descriptions[node[0]] = node_id - self.node_id_to_description.append(node[0]) - self.categories.append(Category[node[1]["category"]]) - - # Only convolution trees have a number of parents property assigned to them. - if node[1]["category"] == "convolution_tree": - self.number_of_parents[node_id] = node[1]["NumberOfParents"] - - if node[1]["category"] == "factor": - self.initial_beliefs.append(node[1]["InitialBelief"].factor.array) - elif node[1]["category"] == "peptide" or node[1]["category"] == ct_graph_in.category: - self.initial_beliefs.append(np.array([node[1]["InitialBelief_0"], node[1]["InitialBelief_1"]])) - else: - # this entry will never be used as the convolution trees do not hold beliefs - self.initial_beliefs.append(np.ones(4)) - - # We start out with current beliefs that are identical to the initial beliefs - self.current_beliefs = self.initial_beliefs.copy() - - # Now that all nodes have been processed, we need to replace all the neighbours of a node by their node IDs - for (node_id, node) in enumerate(ct_graph_in.nodes(data=True)): - self.neighbours.append([self.node_descriptions[n] for n in ct_graph_in.neighbors(node[0])]) - - # Incoming messages for each node, initialize with correct dimensions - self.msg_in = [[np.zeros(0) for _ in range(len(self.neighbours[i]))] for i in range(amount_of_nodes)] - self.msg_in_new = [[np.zeros(0) for _ in range(len(self.neighbours[i]))] for i in range(amount_of_nodes)] - self.msg_in_log = [[np.zeros(0) for _ in range(len(self.neighbours[i]))] for i in range(amount_of_nodes)] - - # Now, also replace the edge descriptions by the corresponding node IDs - edge_id: int = 0 - for start_node, end_node, data in ct_graph_in.edges(data=True): - start_node_id = self.node_descriptions[start_node] - end_node_id = self.node_descriptions[end_node] - - rev_edge_id = edge_id + 1 - self.edge_ids[(start_node_id, end_node_id)] = edge_id - self.edges.append((start_node_id, end_node_id)) - self.edge_ids[(end_node_id, start_node_id)] = rev_edge_id - self.edges.append((end_node_id, start_node_id)) - - start_neighbour_index: int = self.neighbours[end_node_id].index(start_node_id) - if "MessageLength" in data: - self.msg_in[end_node_id][start_neighbour_index] = np.ones(data["MessageLength"]) - self.msg_in_new[end_node_id][start_neighbour_index] = np.ones(data["MessageLength"]) - else: - self.msg_in[end_node_id][start_neighbour_index] = np.array([0.5, 0.5]) - self.msg_in_new[end_node_id][start_neighbour_index] = np.array([0, 0]) - - end_neighbour_index = self.neighbours[start_node_id].index(end_node_id) - self.msg_in[start_node_id][end_neighbour_index] = self.msg_in[end_node_id][start_neighbour_index] - self.msg_in_new[start_node_id][end_neighbour_index] = self.msg_in_new[end_node_id][start_neighbour_index] - - edge_id += 2 - - self.total_residuals = [[0 for _ in self.neighbours[end_node]] for (_, end_node) in self.edges] - self.msg_in_log = [msg_in.copy() for msg_in in self.msg_in_new] - - - def print_mem_usage(self): - print("max_val: " + str(getsizeof(self.max_val)) + " bytes") - priorities_size = len(self.priorities) * (2 * getsizeof(int())) - print("priorities (app): " + str(priorities_size) + " bytes") - print("category: " + str(getsizeof(self.category)) + " bytes") - - node_descriptions = len(self.node_descriptions) * (getsizeof(int()) + getsizeof("factor")) - print("node_descriptions: " + str(node_descriptions) + " bytes") - node_id_to_description = len(self.node_id_to_description) * (getsizeof("factor")) - print("node_id_to_description: " + str(node_id_to_description) + " bytes") - - categories = len(self.categories) * (getsizeof(Category)) - print("categories: " + str(categories) + " bytes") - - total_neighbours = sum([len(n) for n in self.neighbours]) - neighbours = total_neighbours * getsizeof(int()) + len(self.neighbours) * (getsizeof([])) - print("neighbours: " + str(neighbours) + " bytes") - - number_of_parents = len(self.number_of_parents) * (getsizeof(int())) - print("number_of_parents: " + str(number_of_parents) + " bytes") - - edge_ids = len(self.edge_ids) * (getsizeof((int(), int())) + getsizeof(int())) - print("edge_ids: " + str(edge_ids) + " bytes") - edges = len(self.edges) * getsizeof((int(), int())) - print("edges: " + str(edges) + " bytes") - - total_edge_duos = sum([len(self.neighbours[end_node]) for (_, end_node) in self.edges]) - total_residuals = total_edge_duos * getsizeof(float()) + len(self.total_residuals) * getsizeof([]) - print("total_residuals: " + str(total_residuals) + " bytes") - - total_belief_size = sum([len(n) for n in self.initial_beliefs]) - initial_beliefs = len(self.initial_beliefs) * getsizeof(np.array([])) + total_belief_size * 8 - print("initial_beliefs / current_beliefs: " + str(initial_beliefs) + " bytes") - - # Incoming messages for each node - total_msg_length = sum([sum([len(m) for m in n]) for n in self.msg_in]) - total_msg_lists = sum([len(n) for n in self.msg_in]) + len(self.msg_in) - msg_in = total_msg_length * 8 + total_msg_lists * getsizeof([]) - print("msg_in / msg_in_new / msg_in_log: " + str(msg_in) + " bytes") - - def compute_out_message_variable(self, node_out: int, node_in: int) -> npt.NDArray[np.float64]: - node_in_neighbour_index = self.neighbours[node_out].index(node_in) - incoming_messages: List[npt.NDArray[np.float64]] = self.msg_in[node_out].copy() - incoming_messages.pop(node_in_neighbour_index) - node_belief: npt.NDArray[np.float64] = self.current_beliefs[node_out] - - if not incoming_messages: - node_out_neighbour_id = self.neighbours[node_in].index(node_out) - return node_belief if any(node_belief == self.initial_beliefs[node_out]) else self.msg_in[node_in][node_out_neighbour_id] - - # need for logs to prevent underflow in very large multiplications - incoming_messages_array = np.asarray(np.log(incoming_messages)).reshape( - len(incoming_messages), 2 - ) - - out_message_log = array_utils.log_normalize( - np.log(node_belief) + np.sum(incoming_messages_array, axis=0) - ) - - if not np.all(out_message_log): - out_message_log[out_message_log == 0] = 1e-30 - - return out_message_log - - def compute_out_message_factor(self, node_out: int, node_in: int) -> npt.NDArray[np.float64]: - node_in_neighbour_index = self.neighbours[node_out].index(node_in) - incoming_messages: List[npt.NDArray[np.float64]] = self.msg_in[node_out].copy() - incoming_messages.pop(node_in_neighbour_index) - node_belief = self.current_beliefs[node_out] - - if self.categories[node_in] == Category.convolution_tree: - # handles empty & messages with only one value - incoming_messages.append(np.asarray([1.0, 1.0])) - in_messages_array: npt.NDArray[np.float64] = np.asarray(incoming_messages).reshape( - len(incoming_messages), 2 - ) - out_messages = array_utils.normalize( - np.multiply(node_belief, np.prod(in_messages_array, axis=0)) - ) # lognormalize(np.add(np.log(NodeBelief),[np.sum(IncomingMessages[:,0]),np.sum(IncomingMessages[:,1])]))# - - return np.sum(out_messages, axis=1) - else: - if len(incoming_messages[0]) > 2: - incoming_messages_log = np.log( - np.asarray(incoming_messages).reshape( - len(incoming_messages[0]), 1 - ) - ) - # catch warning for log(0) - - log_belief = np.log(node_belief) - out_messages_log = array_utils.log_normalize(log_belief + incoming_messages_log) - if not np.all(out_messages_log): - out_messages_log[out_messages_log == 0] = 1e-30 - - return np.asarray(np.sum(out_messages_log[:2], axis=1)) - else: - incoming_messages.append(np.asarray([1.0, 1.0])) - incoming_messages_array = np.asarray(incoming_messages).reshape( - len(incoming_messages), 2 - ) - - out_messages = array_utils.normalize( - node_belief * np.prod(incoming_messages_array, axis=0) - ) - return np.sum(out_messages, axis=1) - - def compute_out_messages_ct_tree(self, node: int): - prot_prob_list: List[npt.NDArray[np.float64]] = [] - old_prot_prob_list: List[npt.NDArray[np.float64]] = [] - shared_likelihoods: npt.NDArray[np.float64] = np.ones(self.number_of_parents[node] + 1) - old_shared_likelihoods: npt.NDArray[np.float64] = np.empty(1) - peptides: List[int] = [] - prot_list: List[int] = [] - - for node_in_neighbour_index, node_in in enumerate(self.neighbours[node]): - if self.categories[node_in] != Category.factor: - prot_prob_list.append(self.msg_in[node][node_in_neighbour_index]) - old_prot_prob_list.append(self.msg_in_log[node][node_in_neighbour_index]) - prot_list.append(node_in) - else: - peptides.append(node_in) - try: - shared_likelihoods = np.multiply( - array_utils.avoid_underflow(shared_likelihoods), self.msg_in[node][node_in_neighbour_index] - ) - except: - print(shared_likelihoods, self.msg_in[node][node_in_neighbour_index]) - old_shared_likelihoods = np.multiply( - array_utils.avoid_underflow(shared_likelihoods), self.msg_in_log[node][node_in_neighbour_index] - ) - - if ( - not np.array_equal(old_shared_likelihoods, shared_likelihoods) and - any( - prot_prob_list[i][0] != old_prot_prob_list[i][0] - for i in range(len(prot_prob_list)) - ) - ): - # only update when the shared likelihoods or at least one of the protein messages has changed - ct = ConvolutionTree(shared_likelihoods, prot_prob_list) - - for protein_id, protein in enumerate(prot_list): - node_neighbour_index = self.neighbours[protein].index(node) - self.msg_in_new[protein][node_neighbour_index] = ct.message_to_variable(protein_id) - msg = self.msg_in_new[protein][node_neighbour_index] - if not np.all(msg): - self.msg_in_new[protein][node_neighbour_index][ - msg == 0 - ] = 1e-30 - - for pep in peptides: - node_neighbour_id = self.neighbours[pep].index(node) - self.msg_in_new[pep][node_neighbour_id] = ct.message_to_shared_likelihood() - msg = self.msg_in_new[pep][node_neighbour_id] - if not np.all(msg): - self.msg_in_new[pep][node_neighbour_id][msg < 1e-30] = 1e-30 - - else: - for protein in prot_list: - node_neighbour_id = self.neighbours[protein].index(node) - self.msg_in_new[protein][node_neighbour_id] = self.msg_in[protein][node_neighbour_id] - - for pep in peptides: - node_neighbour_id = self.neighbours[pep].index(node) - self.msg_in_new[pep][node_neighbour_id] = self.msg_in[pep][node_neighbour_id] - - def compute_infinity_norm_residual(self, node_in: int, node_out: int) -> float: - node_in_neighbour_id = self.neighbours[node_out].index(node_in) - msg1 = self.msg_in[node_out][node_in_neighbour_id] - msg2 = self.msg_in_log[node_out][node_in_neighbour_id] - - if len(msg2) != len(msg1): - msg2 = [1] * len(msg1) - - pos = 0 - for i in msg1: - if i < 1e-30: - msg1[pos] = 1e-30 - pos += 1 - - return np.max(np.abs(np.log(np.divide(msg1, msg2)))) - # approximate residual with zero look-ahead - - def compute_total_residuals(self, edge_id: int, current_residual: float): - start_node, end_node = self.edges[edge_id] - end_node_neighbor_id: int = self.neighbours[start_node].index(end_node) - for start_neighbour in self.neighbours[start_node]: - if start_neighbour != end_node: - self.total_residuals[self.edge_ids[(start_neighbour, start_node)]][end_node_neighbor_id] = 0 - - for i, end_neighbour in enumerate(self.neighbours[end_node]): - if end_neighbour != start_node: - self.total_residuals[edge_id][i] += current_residual - - def compute_priority(self, edge_id: int): - start_node, end_node = self.edges[edge_id] - self.priorities[edge_id] = 0 - - for i, end_neighbor in enumerate(self.neighbours[end_node]): - if end_neighbor != start_node: - neighbor_edge: int = self.edge_ids[(end_node, end_neighbor)] - self.priorities[neighbor_edge] = np.sum( - [ - self.total_residuals[self.edge_ids[(sum_run, end_node)]][i] - for sum_run in self.neighbours[end_node] - if sum_run != end_neighbor - ] - ) - - # computes new message for a given edge (startname, endname) in the direction startname -> endname - def single_edge_direction_update(self, start_node: int, end_node: int, checked_cts: set[int]): - start_node_neighbour_id = self.neighbours[end_node].index(start_node) - if ( - self.categories[start_node] == self.category - or self.categories[start_node] == Category.peptide - ): - self.msg_in_new[end_node][start_node_neighbour_id] = self.compute_out_message_variable( - start_node, end_node - ) - - if self.categories[start_node] == Category.convolution_tree and start_node not in checked_cts: - self.compute_out_messages_ct_tree(start_node) - checked_cts.add(start_node) - - if self.categories[start_node] == Category.factor: - self.msg_in_new[end_node][start_node_neighbour_id] = self.compute_out_message_factor( - start_node, end_node - ) - - # compute updated messages for all edges - def compute_update(self, local_loops: bool = False): - checked_cts: set[int] = set() # keeps track of which CT has already been active - - if local_loops and self.max_val: - start_node = self.max_val[1] - for end_node in self.neighbours[self.max_val[1]]: - self.single_edge_direction_update(start_node, end_node, checked_cts) - - else: - # update all edges - for start_node, neighbours in enumerate(self.neighbours): - for end_node in neighbours: - self.single_edge_direction_update(start_node, end_node, checked_cts) - - def get_priority_message(self) -> int: - self.max_val = self.priorities.top() - return self.max_val - - def zero_look_ahead_loopy_loop( - self, - max_loops: int, - tolerance: float, - progress_listener: ZeroLookaheadProgressListener - ) -> Dict[str, npt.NDArray[np.float64]]: - """ - Run the zero-look-ahead belief propagation algorithm. - :param max_loops: int, maximum number of iterations in case of non-convergence - :param tolerance: float, tolerance for convergence check - :param progress_listener: ZeroLookaheadProgressListener, progress listener that waits for updates from the running - belief propagation algorithm. - """ - - max_residual = 100 - - # first, do 5 loops where I update all messages - progress_listener.iterations_updated(0, max_loops) - for k in range(0, 5): - self.compute_update() - self.msg_in_log = [msg_in.copy() for msg_in in self.msg_in] - self.msg_in = [msg_in.copy() for msg_in in self.msg_in_new] - - progress_listener.iterations_updated(k, max_loops) - - # compute all residuals after 5 runs once (= initialize the residual/priorities vectors) - residuals = [(edge_id, self.compute_infinity_norm_residual(*edge)) for edge_id, edge in enumerate(self.edges)] - - # set the priority vector once with copy of the previously calculated residuals - self.priorities = pqdict(residuals, reverse=True) - - k = 5 - - # keep track of the nodes of which the incoming messages have changed - prev_changed = [i for i in range(len(self.msg_in))] - - while k < max_loops and max_residual > tolerance: - # actual zero-look-ahead-BP part - priority_message_edge_id = self.get_priority_message() - max_residual = self.priorities[priority_message_edge_id] - (start_node, end_node) = self.edges[priority_message_edge_id] - - self.single_edge_direction_update(start_node, end_node, set()) - - # TODO: should this not be after updating messages? (uses msg_in and msg_log) - priority_residual = self.compute_infinity_norm_residual(start_node, end_node) - for node in prev_changed: - self.msg_in_log[node] = self.msg_in[node].copy() - prev_changed = [] - # if the start node is a convolution tree, all the incoming messages of the neighbours can be changed. - if self.categories[start_node] == Category.convolution_tree: - for node in self.neighbours[start_node]: - prev_changed.append(node) - for i, neighbour in enumerate(self.neighbours[node]): - self.msg_in[node][i] = self.msg_in_new[node][i] - else: - start_node_neighbour_id = self.neighbours[end_node].index(start_node) - self.msg_in[end_node][start_node_neighbour_id] = self.msg_in_new[end_node][start_node_neighbour_id] - prev_changed.append(end_node) - - self.compute_total_residuals( - priority_message_edge_id, priority_residual - ) - self.compute_priority(priority_message_edge_id) - - # Try not to overwhelm the progress listener with too many messages. That's why only every 5th iteration - # will be reported. - if k % 5 == 0: - progress_listener.iterations_updated(k, max_loops) - - k += 1 - - # marginalize once the model has converged - for (node_id, node_category) in enumerate(self.categories): - if ( - node_category == self.category - or node_category == Category.peptide - ): - incoming_messages: List[npt.NDArray[np.float64]] = [] - - for incoming_message in self.msg_in[node_id]: - incoming_messages.append( - incoming_message - ) - - # log to avoid overflow - incoming_messages_array = np.log(incoming_messages).reshape( - len(incoming_messages), 2 - ) - logged_variable_marginal = array_utils.log_normalize( - np.log(self.initial_beliefs[node_id]) + np.sum(incoming_messages_array, axis=0) - ) - - self.current_beliefs[node_id] = logged_variable_marginal - - # Translate the node_ids back to their original sequences and return the current beliefs as a dictionary - output_beliefs: Dict[str, npt.NDArray[np.float64]] = {} - for (node_id, beliefs) in enumerate(self.current_beliefs): - output_beliefs[self.node_id_to_description[node_id]] = beliefs - return output_beliefs - - -# calibration through message passing of all subgraphs in the List of factor graphs -def calibrate_all_subgraphs( - list_of_ct_factor_graphs: List[CTFactorGraph], - max_iterations: int, - tolerance: float, - progress_listener: ZeroLookaheadProgressListener -) -> Tuple[Dict[str, npt.NDArray[np.float64]], Dict[str, str]]: - """ - Performs bayesian inference through loopy belief propagation, returns dictionary {variable:posterior_probability} - :param list_of_ct_factor_graphs: list, contains FactorGraph objects on which inference can be performed - :param max_iterations: int, max number of iterations in case of non-convergence - :param tolerance: float, error tolerance between messages for convergence criterion - :param progress_listener: ZeroLookaheadProgressListener, progress listener that waits for updates from the running - belief propagation algorithm. - """ - - results_dict: Dict[str, npt.NDArray[np.float64]] = {} - node_category_dict: Dict[str, str] = {} - - progress_listener.graphs_updated(0, len(list_of_ct_factor_graphs)) - for (idx, graph) in enumerate(list_of_ct_factor_graphs): - if graph.number_of_nodes() > 2: - node_category_dict.update(dict(graph.nodes(data="category"))) - - initialized_message_object = Messages(graph) - current_beliefs = initialized_message_object.zero_look_ahead_loopy_loop( - max_iterations, - tolerance, - progress_listener - ) - - results_dict.update(current_beliefs) - progress_listener.graphs_updated(idx + 1, len(list_of_ct_factor_graphs)) - return results_dict, node_category_dict - - -def convert_results_to_csv(results_dict: Dict[str, npt.NDArray[np.float64]], node_dict: Dict[str, str]): - """ - Save Loopy Belief Propagation results to .csv file - :param results_dict: dict, {variable:posterior_probability} - :param node_dict: dict, dictionary of nodes that were in the factor graph and their attributes, to include the node category in the results - """ - - full_results_dict = { - key: [results_dict[key][1], node_dict[key]] for key in results_dict.keys() - } - - return pd.DataFrame.from_dict(data=full_results_dict, orient="index").to_csv(header=False) - - -def run_belief_propagation( - graphml_content: str, - alpha: float, - beta: float, - regularized: bool, - prior: float, - max_iter: int = 10000, - tol: float = 0.006, - progress_listener: ZeroLookaheadProgressListener = PrintZeroLookaheadProgressListener() - ): - """ - Runs the belief propagation algorithm on a graph that's represented by the string in graphml_content with the - tuning parameters further specified to this function. This function returns a string that contains the result of - the belief propagation algorithm, represented as a CSV (and can thus directly be written to a CSV-file, if desired). - """ - - ct_factor_graph = CTFactorGraph(graphml_content) - ct_factor_graph.fill_in_factors(alpha, beta, regularized) - ct_factor_graph.fill_in_priors(prior) - ct_factor_graph.add_ct_nodes() - - ct_factor_graphs = [ - separate_subgraphs(ct_factor_graph, sorted(filter_nodes)) - for filter_nodes in nx.connected_components(ct_factor_graph) - ] - - results_dict, node_types = calibrate_all_subgraphs( - ct_factor_graphs, - max_iter, - tol, - progress_listener - ) - - return convert_results_to_csv(results_dict, node_types) diff --git a/peptonizer/pyrightconfig.json b/peptonizer/pyrightconfig.json deleted file mode 100644 index 89027ad..0000000 --- a/peptonizer/pyrightconfig.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "include": [ - "implementation/array_utils.py", - "implementation/convolution_tree.py", - "implementation/zero_lookahead_belief_propagation.py" - ], - "strict": [ - "implementation/array_utils.py", - "implementation/convolution_tree.py", - "implementation/zero_lookahead_belief_propagation.py" - ], - "reportMissingTypeStubs": true, - "reportMissingImports": true, - "typeRoots": [ - "./typings" - ] -} \ No newline at end of file diff --git a/peptonizer/setup.py b/peptonizer/setup.py deleted file mode 100644 index 8b2c10f..0000000 --- a/peptonizer/setup.py +++ /dev/null @@ -1,23 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name="peptonizer", - version="0.1", - packages=find_packages(), - install_requires=[ - "numpy", - "scipy", - "networkx", - "pandas" - ], - author="Tanja Holstein", - author_email="tanja.holstein@ugent.be", - description="The Peptonizer allows you to easily find out which taxa are most likely present in a metaproteomics sample of interest.", - url="https://github.com/compomics/Peptonizer2000", # Replace with your GitHub URL - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - python_requires='>=3.6', -) diff --git a/peptonizer/typings/scipy/__init__.pyi b/peptonizer/typings/scipy/__init__.pyi deleted file mode 100644 index e69de29..0000000 diff --git a/peptonizer/typings/scipy/signal.pyi b/peptonizer/typings/scipy/signal.pyi deleted file mode 100644 index 4f5c97b..0000000 --- a/peptonizer/typings/scipy/signal.pyi +++ /dev/null @@ -1,11 +0,0 @@ -from typing import Union, Optional, Sequence -import numpy as np -import numpy.typing as npt - -def fftconvolve( - in1: npt.ArrayLike, - in2: npt.ArrayLike, - mode: str = 'full', - axes: Optional[Union[int, Sequence[int]]] = None -) -> npt.NDArray[np.float64]: - ... \ No newline at end of file diff --git a/peptonizer/typings/scipy/special.pyi b/peptonizer/typings/scipy/special.pyi deleted file mode 100644 index 42d2d4e..0000000 --- a/peptonizer/typings/scipy/special.pyi +++ /dev/null @@ -1,12 +0,0 @@ -from typing import Optional, Tuple, Union -import numpy as np -import numpy.typing as npt - -def logsumexp( - a: npt.ArrayLike, - axis: Optional[Union[int, Tuple[int, ...]]] = None, - b: Optional[npt.ArrayLike] = None, - keepdims: bool = False, - return_sign: bool = False -) -> Union[npt.NDArray[np.float64], Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]]: - ... diff --git a/peptonizer_rust/Cargo.toml b/peptonizer_rust/Cargo.toml new file mode 100644 index 0000000..7fe8504 --- /dev/null +++ b/peptonizer_rust/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "peptonizer_rust" +version = "0.1.0" +publish = false +edition = "2024" + +[lib] +name = "peptonizer_rust" +crate-type = ["cdylib"] + +[dependencies] +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +futures = "0.3" +csv = "1.1" +petgraph = { version = "0.6", features = ["serde-1"] } +minidom = "0.15" +rustfft = { version = "6.2.0", features = ["wasm_simd"] } +console_error_panic_hook = "0.1" +rand = "0.8" +priority-queue = "2.7.0" +ordered-float = "4.2" +fast-math = "0.1" +nori_inference = "1.0.1" + +[target.'cfg(target_arch = "wasm32")'.dependencies] +wasm-bindgen = "0.2" +wasm-bindgen-futures = "0.4" +web-sys = { version = "0.3", features = ['Headers', 'Request', 'RequestInit', 'RequestMode', 'Response', 'Window', 'XmlHttpRequest'] } +js-sys = "0.3.76" +getrandom = { version = "0.2", features = ["js"] } + +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +pyo3 = { version = "0.22", features = ["extension-module"] } +reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] } +tokio = { version = "1", features = ["full"] } + diff --git a/peptonizer_rust/build.rs b/peptonizer_rust/build.rs new file mode 100644 index 0000000..c6e4844 --- /dev/null +++ b/peptonizer_rust/build.rs @@ -0,0 +1,25 @@ +use std::fs::File; +use std::io::Write; +use std::path::Path; + +const N: usize = 1024; +const MIN_X: f64 = 1e-10; +const MAX_X: f64 = 1.0; + +fn main() { + let step = (MAX_X - MIN_X) / (N as f64 - 1.0); + + let out_dir = std::env::var("OUT_DIR").unwrap(); + let path = Path::new(&out_dir).join("log_table.rs"); + let mut file = File::create(&path).unwrap(); + + writeln!(file, "pub const LOG_TABLE: [f64; {N}] = [").unwrap(); + for i in 0..N { + let x = MIN_X + i as f64 * step; + writeln!(file, " {:.16},", x.ln()).unwrap(); + } + writeln!(file, "];").unwrap(); + + // Tell Cargo to rerun build.rs if this file changes (optional) + println!("cargo:rerun-if-changed=build.rs"); +} \ No newline at end of file diff --git a/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-macosx_10_12_x86_64.whl b/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-macosx_10_12_x86_64.whl new file mode 100644 index 0000000..0ac63aa Binary files /dev/null and b/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-macosx_10_12_x86_64.whl differ diff --git a/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-macosx_11_0_arm64.whl b/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-macosx_11_0_arm64.whl new file mode 100644 index 0000000..f5d98bb Binary files /dev/null and b/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-macosx_11_0_arm64.whl differ diff --git a/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl b/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl new file mode 100644 index 0000000..ed099ad Binary files /dev/null and b/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl differ diff --git a/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl b/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl new file mode 100644 index 0000000..5a7fb7d Binary files /dev/null and b/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl differ diff --git a/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-win_amd64.whl b/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-win_amd64.whl new file mode 100644 index 0000000..23a0022 Binary files /dev/null and b/peptonizer_rust/dist/peptonizer_rust-0.1.0-cp312-cp312-win_amd64.whl differ diff --git a/peptonizer_rust/src/analyse_grid_search.rs b/peptonizer_rust/src/analyse_grid_search.rs new file mode 100644 index 0000000..6112b22 --- /dev/null +++ b/peptonizer_rust/src/analyse_grid_search.rs @@ -0,0 +1,170 @@ +use std::collections::{HashSet, HashMap}; +use crate::taxa_clustering::{Taxon, parse_taxon_csv}; + + +/// Computes a "goodness" score for clustering results by combining +/// ranking similarity (via rank-biased overlap) and diversity (via entropy). +/// +/// # Arguments +/// * `clustered_taxa_weights_csv` - CSV string containing clustered taxa weights. +/// * `peptonizer_results` - JSON string containing taxa scores produced by Peptonizer. +/// +/// # Returns +/// A `Result>` containing the computed goodness score, +/// or an error if parsing fails. +/// +/// # Errors +/// This function may return an error if the input CSV or JSON cannot be parsed. +pub fn compute_goodness( + clustered_taxa_weights_csv: String, + peptonizer_results: String +) -> Result> { + + let taxid_weights: Vec = parse_taxon_csv(clustered_taxa_weights_csv)?; + let higher_taxa: Vec = taxid_weights.iter().map(|t| t.higher_taxa).collect(); + + let taxa_scores: HashMap = serde_json::from_str(&peptonizer_results)?; + let mut taxa_scores: Vec<(&String, &f64)> = taxa_scores.iter().collect(); + + taxa_scores.sort_by(|a, b| b.1.partial_cmp(a.1).expect("Partial compare returned None")); // ascending order + + let sorted_ids: Vec = taxa_scores.iter() + .map(|(k, _)| (*k).clone().parse::()) + .collect::, _>>()?; + let sorted_scores: Vec = taxa_scores.iter().map(|(_, v)| **v).collect::>(); + + let entropy = entropy(&sorted_scores); + let rbo = rbo(&higher_taxa, &sorted_ids); + + Ok(rbo / entropy.powi(2)) +} + + +/// Computes the Rank-Biased Overlap (RBO) between two ranked lists of taxon IDs. +/// RBO measures the agreement between two ranked lists, emphasizing higher ranks. +/// +/// # Arguments +/// * `list1` - First ranked list of taxon IDs. +/// * `list2` - Second ranked list of taxon IDs. +/// +/// # Returns +/// A value between 0.0 and 1.0 representing the similarity of the two ranked lists. +fn rbo(list1: &[usize], list2: &[usize]) -> f64 { + let k = list1.len().min(list2.len()); + let mut sum = 0.0; + + let mut seen1 = HashSet::new(); + let mut seen2 = HashSet::new(); + + for d in 1..=k { + seen1.insert(list1[d - 1]); + seen2.insert(list2[d - 1]); + + let overlap = seen1.intersection(&seen2).count() as f64; + let agreement = overlap / d as f64; + + sum += agreement; + } + + sum / k as f64 +} + + +/// Calculates the Shannon entropy of a set of values. +/// Entropy measures the diversity or unpredictability of a distribution. +/// +/// # Arguments +/// * `values` - A slice of floating-point values representing weights or probabilities. +/// +/// # Returns +/// The entropy as a floating-point value. Returns 0.0 if all values sum to zero. +fn entropy(values: &[f64]) -> f64 { + let sum: f64 = values.iter().sum(); + + if sum == 0.0 { + return 0.0; + } + + values.iter() + .map(|&v| { + let p = v / sum; + if p > 0.0 { + -p * p.log2() + } else { + 0.0 + } + }) + .sum() +} + + +#[cfg(test)] +mod tests { + use super::*; + use crate::taxa_clustering::generate_taxa_cluster_csv; + + #[test] + fn test_entropy_uniform_distribution() { + let values = vec![1.0, 1.0, 1.0, 1.0]; + let result = entropy(&values); + // Uniform distribution of 4 elements has entropy = log2(4) = 2.0 + assert!((result - 2.0).abs() < 1e-6); + } + + #[test] + fn test_entropy_all_zero() { + let values = vec![0.0, 0.0, 0.0]; + let result = entropy(&values); + assert_eq!(result, 0.0); + } + + #[test] + fn test_rbo_perfect_match() { + let list1 = vec![1, 2, 3, 4]; + let list2 = vec![1, 2, 3, 4]; + let result = rbo(&list1, &list2); + assert!((result - 1.0).abs() < 1e-6); + } + + #[test] + fn test_rbo_no_overlap() { + let list1 = vec![1, 2, 3]; + let list2 = vec![4, 5, 6]; + let result = rbo(&list1, &list2); + assert_eq!(result, 0.0); + } + + #[test] + fn test_compute_goodness_valid_inputs() { + // Prepare a small CSV with two taxa + let taxa = vec![ + Taxon { + id: 0, + higher_taxa: 1, + scaled_weight: 0.5, + unique: true, + cluster_members: vec![1, 2], + }, + Taxon { + id: 1, + higher_taxa: 2, + scaled_weight: 0.8, + unique: false, + cluster_members: vec![2, 3], + }, + ]; + let csv = generate_taxa_cluster_csv(taxa).unwrap(); + + // JSON scores + let json_scores = serde_json::json!({ + "1": 0.9, + "2": 0.8 + }).to_string(); + + let result = compute_goodness(csv, json_scores); + assert!(result.is_ok()); + let score = result.unwrap(); + assert!(score.is_finite()); + assert!(score > 0.0); + } +} \ No newline at end of file diff --git a/peptonizer_rust/src/clean_csv.rs b/peptonizer_rust/src/clean_csv.rs new file mode 100644 index 0000000..0e166a0 --- /dev/null +++ b/peptonizer_rust/src/clean_csv.rs @@ -0,0 +1,94 @@ +use csv::ReaderBuilder; +use serde::Deserialize; +use crate::unipept_communicator::get_names_for_taxa; +use crate::http_client::HttpResult; +use std::collections::HashMap; + +#[derive(Deserialize)] +struct Row { + id: String, + score: String, + row_type: String, +} + +/// Read a CSV-file that was produced by the PepGM algorithm and use it to +/// produce a new CSV-file that only contains the taxon-related information +/// and scores. The string produced by this function can be written directly +/// to a valid CSV-file and contains three columns: `taxon_name`, `taxon_id`, +/// and `score`. +/// +/// # Arguments +/// * `csv_content` - A CSV-file (as a string) that has been generated by running the PepGM algorithm. +/// +/// # Returns +/// A `String` containing CSV rows with the columns: `taxon_name,taxon_id,score`. +pub async fn clean_csv(csv_content: String) -> HttpResult { + // Parse CSV input (without headers) + let mut rdr = ReaderBuilder::new() + .has_headers(false) + .from_reader(csv_content.as_bytes()); + + let mut tax_ids: Vec<(usize, f32)> = Vec::new(); + + for result in rdr.deserialize() { + let record: Row = result?; + if record.row_type == "taxon" { + if let (Ok(id), Ok(score)) = (record.id.parse::(), record.score.parse::()) { + tax_ids.push((id, score)); + } + } + } + + // Sort by score ascending + tax_ids.sort_by(|a, b| a.1.partial_cmp(&b.1).expect("Partial compare returned None")); + + // Collect all taxon IDs for name lookup + let ids: Vec = tax_ids.iter().map(|(id, _)| *id).collect(); + let name_mapping: HashMap = get_names_for_taxa(&ids) + .await + .map_err(|e| format!("Failed to retrieve taxon names: {e}"))?; + + // Build CSV output + let mut lines: Vec = Vec::new(); + lines.push("taxon_name,id,score".to_string()); + + for (id, score) in tax_ids { + if let Some(name) = name_mapping.get(&id) { + lines.push(format!("{name},{id},{score}")); + } + } + + Ok(lines.join("\n")) +} + + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_clean_csv_basic() { + // CSV with two taxa and one non-taxon row + let csv_input = "1,0.5,taxon\n2,0.2,taxon\n3,0.9,other\n".to_string(); + + let result = clean_csv(csv_input).await.unwrap(); + println!("{}", result); + let expected_lines: Vec<&str> = vec![ + "taxon_name,id,score", + "Bacteria,2,0.2", + "root,1,0.5", + ]; + for line in expected_lines { + assert!(result.contains(line)); + } + } + + #[tokio::test] + async fn test_clean_csv_empty_input() { + let csv_input = "".to_string(); + let result = clean_csv(csv_input).await; + assert!(result.is_ok()); + let output = result.unwrap(); + assert!(output.contains("taxon_name,id,score")); + } +} \ No newline at end of file diff --git a/peptonizer_rust/src/factor_graph.rs b/peptonizer_rust/src/factor_graph.rs new file mode 100644 index 0000000..539c35b --- /dev/null +++ b/peptonizer_rust/src/factor_graph.rs @@ -0,0 +1,145 @@ +use std::collections::{HashMap, HashSet}; +use serde::Deserialize; +use csv::ReaderBuilder; +use nori::load_factor_graph_bytes; + +/// Represents a single taxon weight record parsed from a CSV file. +#[derive(Deserialize)] +pub struct TaxonWeight { + pub sequence: String, + pub score: f32, + pub higher_taxa: usize, +} + + +/// Parses a CSV string into a vector of `TaxonWeight` structs. +/// +/// # Arguments +/// * `sequence_scores_csv` - A string containing CSV data for taxon weights. The CSV +/// must include headers: `id, sequence, score, psms, higher_taxa, weight, log_weight`. +/// +/// # Returns +/// Returns a `Result` containing a vector of `TaxonWeight` structs if parsing succeeds. +/// +/// # Errors +/// Returns an error if the CSV cannot be read, or if any record fails deserialization. +pub fn parse_taxon_weights_csv(sequence_scores_csv: String) -> Result, Box> { + let mut rdr = ReaderBuilder::new() + .has_headers(true) + .from_reader(sequence_scores_csv.as_bytes()); + + let mut sequence_scores = Vec::new(); + for record in rdr.deserialize() { + let row: TaxonWeight = record.unwrap(); + sequence_scores.push(row); + } + + Ok(sequence_scores) +} + + +/// Generates a GraphML representation of a factor graph from a CSV string of taxon weights. +/// +/// # Arguments +/// * `sequence_scores_csv` - A string containing CSV data for taxon weights. +/// +/// # Returns +/// Returns a `Result` containing a GraphML string representation of the factor graph. +/// +/// # Errors +/// Returns an error if CSV parsing fails or if any error occurs during graph construction. +pub fn generate_graph(sequence_scores_csv: String) -> Result, Box> { + + let sequence_scores = parse_taxon_weights_csv(sequence_scores_csv)?; + + let peptide_taxon_graph = taxon_weights_to_graphml(&sequence_scores); + let factor_graph = load_factor_graph_bytes(&peptide_taxon_graph)?; + Ok(factor_graph) +} + +pub fn taxon_weights_to_graphml(taxon_weights: &Vec) -> String { + + let mut xml = String::new(); + xml.push_str(r#""#); + xml.push_str(r#""#); + xml.push_str(r#" "#); + xml.push_str(r#" "#); + xml.push_str(r#" "#); + + // Keep unique sequence nodes + let mut seen_sequences = HashSet::new(); + // Store first score seen for each sequence + let mut sequence_scores: HashMap<&str, f32> = HashMap::new(); + + for item in taxon_weights { + sequence_scores + .entry(item.sequence.as_str()) + .or_insert(item.score); + } + + // Sequence nodes + for (sequence, score) in &sequence_scores { + if seen_sequences.insert(*sequence) { + xml.push_str(&format!( + r#" + input + [{}, {}] + +"#, + sequence, + 1.0 - score, + score + )); + } + } + + // Taxa nodes + let mut seen_taxa = HashSet::new(); + for item in taxon_weights { + if seen_taxa.insert(item.higher_taxa) { + xml.push_str(&format!( + r#" + output + +"#, + item.higher_taxa + )); + } + } + + // Edges + for item in taxon_weights { + xml.push_str(&format!( + r#" +"#, + item.sequence, + item.higher_taxa + )); + } + xml.push_str(" \n"); + xml.push_str("\n"); + + xml +} + + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_csv() -> String { + "id,sequence,score,psms,higher_taxa,weight,log_weight +1,PEPTIDE1,0.8,3,100,0.5,-0.3 +2,PEPTIDE2,0.6,3,100,0.4,-0.5 +3,PEPTIDE3,0.9,3,200,0.7,-0.1" + .to_string() + } + + #[test] + fn test_parse_taxon_weights_csv() { + let csv = sample_csv(); + let taxa = parse_taxon_weights_csv(csv).unwrap(); + assert_eq!(taxa.len(), 3); + assert!((taxa[1].score - 0.6).abs() < 1e-6); + } +} diff --git a/peptonizer_rust/src/fetch_unipept_taxa.rs b/peptonizer_rust/src/fetch_unipept_taxa.rs new file mode 100644 index 0000000..0e210a7 --- /dev/null +++ b/peptonizer_rust/src/fetch_unipept_taxa.rs @@ -0,0 +1,99 @@ +use crate::unipept_communicator::{get_taxa_for_peptides_async, get_descendants_for_taxa_async}; +use crate::http_client::HttpResult; +use crate::weight_taxa::normalize_unipept_responses; +use std::collections::{HashMap, HashSet}; + + +/// Fetches taxa for peptides and filters them by rank and taxon query. +/// +/// # Arguments +/// * `peptides` - JSON string of peptide sequences. +/// * `rank` - Taxonomic rank used for filtering (e.g. "species"). +/// * `taxon_query` - JSON string of taxon IDs to filter against. +/// * `normalize_unipept_responses_flag` - Whether fetched taxa should be normalized to `rank`. +/// +/// # Returns +/// JSON string mapping peptides to filtered taxon IDs. +/// +/// # Panics +/// Panics if input JSON cannot be parsed or if result cannot be serialized. +pub async fn fetch_peptides_and_filter_taxa( + peptides: String, + rank: String, + taxon_query: String, + normalize_unipept_responses_flag: bool +) -> HttpResult { + // Parse arguments + let peptides: Vec = serde_json::from_str(&peptides)?; + let taxon_query_ids: Vec = serde_json::from_str(&taxon_query)?; + + // First we retrieve all taxa associated with the given peptids + let mut peptides_taxa: HashMap> = get_taxa_for_peptides_async(peptides).await?; + + // Then, we make sure to filter the taxa and only keep those that are associated + // to the taxa of interest indicated by the user. Retrieve all (in)direct children + // of the filter taxa provided by the user + let taxa_filter: HashSet = get_descendants_for_taxa_async(taxon_query_ids, rank.clone()).await?; + + // Compute the intersection of the taxa that should be retained and the original list of taxa + for taxa_list in peptides_taxa.values_mut() { + taxa_list.retain(|taxon| taxa_filter.contains(taxon)); + } + + if normalize_unipept_responses_flag { + // Keep key order stable while normalizing taxa vectors in bulk. + let peptide_keys: Vec = peptides_taxa.keys().cloned().collect(); + let mut taxa_vectors: Vec> = peptide_keys + .iter() + .filter_map(|peptide| peptides_taxa.get(peptide).cloned()) + .collect(); + + normalize_unipept_responses(&mut taxa_vectors, &rank) + .await + .map_err(|e| format!("Failed to normalize Unipept responses: {e}"))?; + + for (peptide, normalized_taxa) in peptide_keys.into_iter().zip(taxa_vectors.into_iter()) { + peptides_taxa.insert(peptide, normalized_taxa); + } + } + + Ok(serde_json::to_string(&peptides_taxa)?) +} + + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::Value; + + #[tokio::test] + async fn test_fetch_with_known_peptide_and_species() { + let peptides = serde_json::to_string(&vec!["TATAAAA".to_string()]).unwrap(); + + let taxon_query = serde_json::to_string(&vec![2]).unwrap(); + + let result = fetch_peptides_and_filter_taxa(peptides, "species".to_string(), taxon_query, true).await; + assert!(result.is_ok()); + let result = result.unwrap(); + + let parsed: Value = serde_json::from_str(&result).unwrap(); + assert!(parsed.is_object()); + + assert!(parsed.get("TATAAAA").is_some()); + } + + #[tokio::test] + async fn test_empty_peptides_and_taxa() { + let peptides = "[]".to_string(); + let taxon_query = "[]".to_string(); + + let result = fetch_peptides_and_filter_taxa(peptides, "species".to_string(), taxon_query, true).await; + assert!(result.is_ok()); + let result = result.unwrap(); + + let parsed: Value = serde_json::from_str(&result).unwrap(); + assert!(parsed.is_object()); + + assert_eq!(parsed.as_object().unwrap().len(), 0); + } +} diff --git a/peptonizer_rust/src/http_client.rs b/peptonizer_rust/src/http_client.rs new file mode 100644 index 0000000..9433cc2 --- /dev/null +++ b/peptonizer_rust/src/http_client.rs @@ -0,0 +1,126 @@ +use std::future::Future; +use std::pin::Pin; + +pub type BoxError = Box; +pub type HttpResult = Result; +pub type HttpFuture<'a> = Pin> + 'a>>; + +pub struct HttpClient { + #[cfg(not(target_arch = "wasm32"))] + client: reqwest::Client, +} + +impl HttpClient { + pub fn new() -> Self { + Self { + #[cfg(not(target_arch = "wasm32"))] + client: reqwest::Client::new(), + } + } + + pub fn perform_post_request<'a>(&'a self, url: String, payload_json: String) -> HttpFuture<'a> { + Box::pin(async move { + #[cfg(target_arch = "wasm32")] + { + use js_sys::{Function, Promise, Reflect, global}; + use wasm_bindgen::JsCast; + use wasm_bindgen_futures::JsFuture; + use web_sys::{Request, RequestInit, RequestMode, Response}; + + let opts = RequestInit::new(); + opts.set_method("POST"); + opts.set_mode(RequestMode::Cors); + opts.set_body(&payload_json.into()); + + let request = Request::new_with_str_and_init(&url, &opts) + .map_err(|e| format!("Failed to build request: {:?}", e))?; + request + .headers() + .set("Content-Type", "application/json") + .map_err(|e| format!("Failed to set request header: {:?}", e))?; + + // Use globalThis.fetch so this works in both Window and Worker contexts. + let global_this = global(); + let fetch_value = Reflect::get(&global_this, &wasm_bindgen::JsValue::from_str("fetch")) + .map_err(|e| format!("Failed to access global fetch function: {:?}", e))?; + let fetch_fn: Function = fetch_value.dyn_into().map_err(|_| "globalThis.fetch is not callable")?; + let fetch_result = fetch_fn + .call1(&global_this, request.as_ref()) + .map_err(|e| format!("Failed to invoke fetch: {:?}", e))?; + let fetch_promise: Promise = fetch_result.dyn_into().map_err(|_| "fetch did not return a Promise")?; + + let response = JsFuture::from(fetch_promise) + .await + .map_err(|e| format!("Fetch rejected: {:?}", e))?; + let response: Response = response + .dyn_into() + .map_err(|e| format!("Failed to decode fetch response: {:?}", e))?; + + if !response.ok() { + return Err(format!("Status code {}", response.status()).into()); + } + + let response_text = JsFuture::from( + response + .text() + .map_err(|e| format!("Failed to create response text promise: {:?}", e))?, + ) + .await + .map_err(|e| format!("Failed to read response text: {:?}", e))?; + return response_text + .as_string() + .ok_or("Response body is not a UTF-8 string".into()); + } + + #[cfg(not(target_arch = "wasm32"))] + { + let response = self + .client + .post(&url) + .header("Content-Type", "application/json") + .body(payload_json) + .send() + .await?; + + return Ok(response.text().await?); + } + + #[allow(unreachable_code)] + Err("Unsupported target architecture".into()) + }) + } +} + +pub fn create_http_client() -> HttpClient { + HttpClient::new() +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[tokio::test] + async fn test_invalid_url_returns_error() { + let client = HttpClient::new(); + let payload = json!({ "message": "hello" }); + let payload = serde_json::to_string(&payload).unwrap(); + + let result = client + .perform_post_request("http://invalid_url".to_string(), payload) + .await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_real_http_post() { + let client = HttpClient::new(); + let payload = json!({ "foo": "bar" }); + let payload = serde_json::to_string(&payload).unwrap(); + + let result = client + .perform_post_request("https://api.unipept.ugent.be".to_string(), payload) + .await; + assert!(result.is_ok()); + } +} \ No newline at end of file diff --git a/peptonizer_rust/src/input_parser.rs b/peptonizer_rust/src/input_parser.rs new file mode 100644 index 0000000..775dc0c --- /dev/null +++ b/peptonizer_rust/src/input_parser.rs @@ -0,0 +1,147 @@ +use std::collections::HashMap; + + +/// Parses peptide data from a TSV string. +/// +/// The TSV is expected to have a header row and contain at least two columns: +/// peptide sequence and score. For each peptide, the maximum score is stored, +/// along with the number of times the peptide appears. +/// +/// # Arguments +/// * `tsv_content` - Input TSV string with peptide data. +/// +/// # Returns +/// A tuple containing: +/// * `HashMap` - Maximum score per peptide. +/// * `HashMap` - Count of occurrences per peptide. +/// +/// # Errors +/// Returns an error if lines are malformed, scores cannot be parsed, +/// or the input is otherwise invalid. +#[allow(clippy::type_complexity)] +fn parse_peptides(tsv_content: String) -> Result<(HashMap, HashMap), Box> { + let mut peptides_scores: HashMap = HashMap::new(); + let mut peptides_counts: HashMap = HashMap::new(); + + let mut lines = tsv_content.lines().map(|l| l.trim()).filter(|l| !l.is_empty()); + + // Skip the header + lines.next(); + + for line in lines { + let mut parts = line.split('\t'); + let peptide = parts.next() + .ok_or_else(|| format!("Invalid line (missing peptide): {line}"))? + .to_string(); + let score_str = parts.next() + .ok_or_else(|| format!("Invalid line (missing score): {line}"))?; + let score: f64 = score_str.parse().map_err(|_| { + format!("Invalid line (score not a number): {line} (score={score_str})") + })?; + + // Update counts + let count = peptides_counts.entry(peptide.clone()).or_insert(0); + *count += 1; + + // Update max score + let entry = peptides_scores.entry(peptide).or_insert(f64::NEG_INFINITY); + if score > *entry { + *entry = score; + } + } + + Ok((peptides_scores, peptides_counts)) +} + + +/// Parses peptides from a TSV string and returns JSON representations +/// of scores and counts. +/// +/// # Arguments +/// * `tsv_content` - Input TSV string with peptide data. +/// +/// # Returns +/// A tuple containing: +/// * `String` - JSON of peptide → max score mapping. +/// * `String` - JSON of peptide → occurrence count mapping. +/// +/// # Errors +/// Returns an error if parsing fails or if JSON serialization fails. +pub fn parse_input_peptides(tsv_content: String) -> Result<(String, String), Box> { + let (peptides_scores, peptides_counts) = parse_peptides(tsv_content)?; + + // Convert HashMaps to JSON strings + let scores_json = serde_json::to_string(&peptides_scores)?; + let counts_json = serde_json::to_string(&peptides_counts)?; + + Ok((scores_json, counts_json)) +} + + +/// Extracts unique peptides from a TSV string and returns them as JSON. +/// +/// # Arguments +/// * `tsv_content` - Input TSV string with peptide data. +/// +/// # Returns +/// JSON string containing the list of unique peptides. +/// +/// # Errors +/// Returns an error if parsing fails or if JSON serialization fails. +pub fn parse_unique_peptides(tsv_content: String) -> Result> { + let (peptides_scores, _) = parse_peptides(tsv_content)?; + + let peptides: Vec = peptides_scores.keys().cloned().collect(); + let peptides_json = serde_json::to_string(&peptides)?; + + Ok(peptides_json) +} + + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_tsv() -> String { + "peptide\tscore\nPEP1\t0.5\nPEP2\t1.0\nPEP1\t0.8\n".to_string() + } + + #[test] + fn test_parse_peptides_basic() { + let (scores, counts) = parse_peptides(sample_tsv()).unwrap(); + + assert_eq!(scores.len(), 2); + assert_eq!(counts.len(), 2); + + // PEP1 appears twice, max score = 0.8 + assert_eq!(counts.get("PEP1"), Some(&2)); + assert!((scores.get("PEP1").unwrap() - 0.8).abs() < 1e-10); + + // PEP2 appears once, score = 1.0 + assert_eq!(counts.get("PEP2"), Some(&1)); + assert!((scores.get("PEP2").unwrap() - 1.0).abs() < 1e-10); + } + + #[test] + fn test_parse_input_peptides() { + let (scores_json, counts_json) = parse_input_peptides(sample_tsv()).unwrap(); + + let scores: HashMap = serde_json::from_str(&scores_json).unwrap(); + let counts: HashMap = serde_json::from_str(&counts_json).unwrap(); + + assert_eq!(scores["PEP1"], 0.8); + assert_eq!(scores["PEP2"], 1.0); + assert_eq!(counts["PEP1"], 2); + assert_eq!(counts["PEP2"], 1); + } + + #[test] + fn test_parse_unique_peptides() { + let peptides_json = parse_unique_peptides(sample_tsv()).unwrap(); + let peptides: Vec = serde_json::from_str(&peptides_json).unwrap(); + + assert_eq!(peptides.len(), 2); + assert!(peptides.contains(&"PEP1".to_string())); + assert!(peptides.contains(&"PEP2".to_string())); + } +} diff --git a/peptonizer_rust/src/lib.rs b/peptonizer_rust/src/lib.rs new file mode 100644 index 0000000..dc594d7 --- /dev/null +++ b/peptonizer_rust/src/lib.rs @@ -0,0 +1,457 @@ +extern crate serde_json; +extern crate serde; + +mod utils; +mod http_client; +mod random; +mod weight_taxa; +mod zero_lookahead_belief_propagation; +mod factor_graph; +mod fetch_unipept_taxa; +mod unipept_communicator; +mod taxa_clustering; +mod analyse_grid_search; +#[cfg(not(target_arch = "wasm32"))] +mod input_parser; +#[cfg(not(target_arch = "wasm32"))] +mod clean_csv; + +#[cfg(target_arch = "wasm32")] +pub use wasm::*; + +#[cfg(not(target_arch = "wasm32"))] +pub use pyo3::*; + +#[cfg(target_arch = "wasm32")] +mod wasm { + use wasm_bindgen::prelude::*; + use crate::fetch_unipept_taxa::fetch_peptides_and_filter_taxa; + use crate::weight_taxa::perform_taxa_weighing; + use crate::zero_lookahead_belief_propagation::run_belief_propagation; + use crate::factor_graph::generate_graph; + use crate::taxa_clustering::cluster_taxa; + use crate::analyse_grid_search::compute_goodness; + + extern crate wasm_bindgen; + extern crate web_sys; + extern crate wasm_bindgen_futures; + extern crate js_sys; + extern crate console_error_panic_hook; + + /// Fetches taxa for peptides and filters them by rank and taxon query. + /// + /// # Arguments + /// * `peptides` - JSON string of peptide sequences. + /// * `rank` - Taxonomic rank used for filtering (e.g. "species"). + /// * `taxon_query` - JSON string of taxon IDs to filter against. + /// + /// # Returns + /// JSON string mapping peptides to filtered taxon IDs. + /// + /// # Panics + /// Panics if input JSON cannot be parsed or if result cannot be serialized. + #[wasm_bindgen] + pub async fn fetch_unipept_taxa_wasm( + peptides: String, + rank: String, + taxon_query: String, + ) -> Result { + fetch_peptides_and_filter_taxa(peptides, rank, taxon_query, true) + .await + .map_err(|e| JsValue::from_str(&format!("fetch_unipept_taxa_wasm failed: {e}"))) + } + + /// Represents the main pipeline for weighting taxa based on peptide evidence. + /// + /// # Arguments + /// + /// * `pep_taxa` - JSON string mapping peptide sequences to lists of taxon IDs. + /// * `pep_scores` - JSON string mapping peptide sequences to their scores (float). + /// * `pep_psm_counts` - JSON string mapping peptide sequences to their PSM counts (int). + /// * `max_taxa` - Maximum number of taxa to include in output. + /// + /// # Returns + /// + /// Tuple `(sequence_csv, taxa_weights_csv)`: + /// * `sequence_csv` - CSV string of peptide sequences and their weights. + /// * `taxa_weights_csv` - CSV string of taxa weights and uniqueness. + #[wasm_bindgen] + pub async fn perform_taxa_weighing_wasm( + pep_taxa: String, + pep_scores: String, + pep_psm_counts: String, + max_taxa: usize + ) -> Result, JsValue> { + console_error_panic_hook::set_once(); // Enable panic logging + let (sequence_csv, taxa_weights_csv): (String, String) = perform_taxa_weighing(pep_taxa, pep_scores, pep_psm_counts, max_taxa, None) + .await + .map_err(|e| JsValue::from_str(&format!("perform_taxa_weighing_wasm failed: {e}")))?; + + Ok(Box::new([JsValue::from(sequence_csv), JsValue::from(taxa_weights_csv)])) + } + + /// Generates a GraphML representation of a factor graph from a CSV string of sequence scores. + /// + /// # Arguments + /// * `sequence_scores_csv` - A string containing CSV data for sequence scores. + /// + /// # Returns + /// Returns a `Result` containing a GraphML string representation of the factor graph. + /// + /// # Errors + /// Returns an error if CSV parsing fails or if any error occurs during graph construction. + #[wasm_bindgen] + pub fn generate_pepgm_graph_wasm(sequence_scores_csv: String) -> Vec { + let factor_graph_bytes = generate_graph(sequence_scores_csv).unwrap(); + + factor_graph_bytes + } + + /// Runs belief propagation on a factor graph provided as a GraphML string. + /// + /// This function constructs the factor graph, fills in factor tables and priors, + /// splits the graph into connected components, and performs loopy belief propagation + /// on each component. The result is returned as a CSV string. + /// + /// # Arguments + /// + /// * `graph` - GraphML representation of the factor graph. + /// * `alpha` - Noisy-OR factor alpha parameter. + /// * `beta` - Noisy-OR factor beta parameter. + /// * `regularized` - Whether to regularize factor tables to penalize large numbers of parents. + /// * `prior` - Prior belief for taxon nodes. + /// * `max_iter` - Maximum number of belief propagation iterations. + /// * `tol` - Tolerance threshold for message convergence. + /// + /// # Returns + /// + /// CSV string with one row per node containing columns: + /// `[node_name, posterior_probability_1, node_category]` + #[wasm_bindgen] + pub fn execute_pepgm_wasm( + graphs: Vec, + alpha: f32, + beta: f32, + regularized: bool, + prior: f32, + max_iter: Option, + tol: Option + ) -> String { + // console_error_panic_hook::set_once(); // Enable panic logging + + run_belief_propagation(&graphs, alpha, beta, regularized, prior, max_iter, tol).unwrap() + } + + /// Clusters taxa based on peptidome similarity and returns a CSV. + /// + /// # Arguments + /// * `sequence_scores_csv` - Sequence scores as CSV string. + /// * `taxa_weights_csv` - Taxa weights as CSV string. + /// * `similarity_threshold` - Threshold for clustering. + /// + /// # Returns + /// CSV string with taxa and their clusters. + /// + /// # Errors + /// Returns an error if parsing, graph building, or clustering fails. + #[wasm_bindgen] + pub fn cluster_taxa_wasm( + sequence_scores_csv: String, + taxa_weights_csv: String, + similarity_threshold: f32 + ) -> String { + cluster_taxa(sequence_scores_csv, taxa_weights_csv, similarity_threshold).unwrap() + } + + /// Computes a "goodness" score for clustering results by combining + /// ranking similarity (via rank-biased overlap) and diversity (via entropy). + /// + /// # Arguments + /// * `clustered_taxa_weights_csv` - CSV string file containing clustered taxa weights. + /// * `peptonizer_results` - JSON string containing taxa scores produced by Peptonizer. + /// + /// # Returns + /// A `Result>` containing the computed goodness score, + /// or an error if parsing fails. + /// + /// # Errors + /// This function may return an error if the input CSV or JSON cannot be parsed. + #[wasm_bindgen] + pub fn compute_goodness_wasm( + clustered_taxa_weights_csv: String, + peptonizer_results: String + ) -> f64 { + compute_goodness(clustered_taxa_weights_csv, peptonizer_results).unwrap() + } + +} + +#[allow(unsafe_op_in_unsafe_fn)] +#[cfg(not(target_arch = "wasm32"))] +mod pyo3 { + use std::future::Future; + use std::sync::OnceLock; + use pyo3::prelude::*; + use pyo3::types::PyBytes; + use crate::fetch_unipept_taxa::fetch_peptides_and_filter_taxa; + use crate::weight_taxa::perform_taxa_weighing; + use crate::zero_lookahead_belief_propagation::run_belief_propagation; + use crate::factor_graph::generate_graph; + use crate::taxa_clustering::cluster_taxa; + use crate::analyse_grid_search::compute_goodness; + use crate::input_parser::{parse_input_peptides, parse_unique_peptides}; + use crate::clean_csv::clean_csv; + use crate::unipept_communicator::get_names_for_taxa; + + extern crate console_error_panic_hook; + + fn block_on_binding_future(future: F) -> F::Output + where + F: Future, + { + use tokio::runtime::{Builder, Runtime}; + + static TOKIO_RUNTIME: OnceLock = OnceLock::new(); + + TOKIO_RUNTIME + .get_or_init(|| { + Builder::new_multi_thread() + .enable_all() + .build() + .expect("Failed to initialize Tokio runtime for Python bindings") + }) + .block_on(future) + } + + /// Parses peptides from a TSV string and returns JSON representations + /// of scores and counts. + /// + /// # Arguments + /// * `tsv_content` - Input TSV string with peptide data. + /// + /// # Returns + /// A tuple containing: + /// * `String` - JSON of peptide → max score mapping. + /// * `String` - JSON of peptide → occurrence count mapping. + /// + /// # Errors + /// Returns an error if parsing fails or if JSON serialization fails. + #[pyfunction] + pub fn parse_input_peptides_py(tsv_content: String) -> (String, String) { + parse_input_peptides(tsv_content).unwrap() + } + + /// Extracts unique peptides from a TSV string and returns them as JSON. + /// + /// # Arguments + /// * `tsv_content` - Input TSV string with peptide data. + /// + /// # Returns + /// JSON string containing the list of unique peptides. + /// + /// # Errors + /// Returns an error if parsing fails or if JSON serialization fails. + #[pyfunction] + pub fn parse_unique_peptides_py(tsv_content: String) -> String { + parse_unique_peptides(tsv_content).unwrap() + } + + /// Fetches taxa for peptides and filters them by rank and taxon query. + /// + /// # Arguments + /// * `peptides` - JSON string of peptide sequences. + /// * `rank` - Taxonomic rank used for filtering (e.g. "species"). + /// * `taxon_query` - JSON string of taxon IDs to filter against. + /// + /// # Returns + /// JSON string mapping peptides to filtered taxon IDs. + /// + /// # Panics + /// Panics if input JSON cannot be parsed or if result cannot be serialized. + #[pyfunction] + pub fn fetch_unipept_taxa_py( + peptides: String, + rank: String, + taxon_query: String, + ) -> String { + block_on_binding_future(fetch_peptides_and_filter_taxa( + peptides, + rank, + taxon_query, + false, + )) + .unwrap() + } + + /// Represents the main pipeline for weighting taxa based on peptide evidence. + /// + /// # Arguments + /// + /// * `pep_taxa` - JSON string mapping peptide sequences to lists of taxon IDs. + /// * `pep_scores` - JSON string mapping peptide sequences to their scores (float). + /// * `pep_psm_counts` - JSON string mapping peptide sequences to their PSM counts (int). + /// * `max_taxa` - Maximum number of taxa to include in output. + /// * `taxa_rank` - Optional taxonomic rank to normalize taxa to. + /// If `None`, taxa are assumed to already be normalized. + /// + /// # Returns + /// + /// Tuple `(sequence_csv, taxa_weights_csv)`: + /// * `sequence_csv` - CSV string of peptide sequences and their weights. + /// * `taxa_weights_csv` - CSV string of taxa weights and uniqueness. + #[pyfunction] + fn perform_taxa_weighing_py( + unipept_responses: String, + pep_scores: String, + pep_psm_counts: String, + max_taxa: usize, + taxa_rank: String + ) -> (String, String) { + block_on_binding_future(perform_taxa_weighing(unipept_responses, pep_scores, pep_psm_counts, max_taxa, Some(taxa_rank))).unwrap() + } + + /// Generates a GraphML representation of a factor graph from a CSV string of taxon weights. + /// + /// # Arguments + /// * `taxa_weights_csv` - A string containing CSV data for taxon weights. + /// + /// # Returns + /// Returns a `Result` containing a GraphML string representation of the factor graph. + /// + /// # Errors + /// Returns an error if CSV parsing fails or if any error occurs during graph construction. + #[pyfunction] + pub fn generate_pepgm_graph_py(py: Python<'_>, taxa_weights_csv: String) -> Py { + let graph_bytes = generate_graph(taxa_weights_csv).unwrap(); + + PyBytes::new_bound(py, &graph_bytes).into() + } + + /// Runs belief propagation on a factor graph provided as a GraphML string. + /// + /// This function constructs the factor graph, fills in factor tables and priors, + /// splits the graph into connected components, and performs loopy belief propagation + /// on each component. The result is returned as a CSV string. + /// + /// # Arguments + /// + /// * `graph` - GraphML representation of the factor graph. + /// * `alpha` - Noisy-OR factor alpha parameter. + /// * `beta` - Noisy-OR factor beta parameter. + /// * `regularized` - Whether to regularize factor tables to penalize large numbers of parents. + /// * `prior` - Prior belief for taxon nodes. + /// * `max_iter` - Maximum number of belief propagation iterations. + /// * `tol` - Tolerance threshold for message convergence. + /// + /// # Returns + /// + /// CSV string with one row per node containing columns: + /// `[node_name, posterior_probability_1, node_category]` + #[pyfunction] + #[pyo3(signature = (graph, alpha, beta, regularized, prior, max_iter=None, tol=None))] + pub fn execute_pepgm_py( + graph: Vec, + alpha: f32, + beta: f32, + regularized: bool, + prior: f32, + max_iter: Option, + tol: Option + ) -> String { + console_error_panic_hook::set_once(); // Enable panic logging + + run_belief_propagation(&graph, alpha, beta, regularized, prior, max_iter, tol).unwrap() + } + + /// Clusters taxa based on peptidome similarity and returns a CSV. + /// + /// # Arguments + /// * `sequence_scores_csv` - CSV string containing peptide sequence scores. + /// * `taxa_weights_csv` - Taxa weights as CSV string. + /// * `similarity_threshold` - Threshold for clustering. + /// + /// # Returns + /// CSV string with taxa and their clusters. + /// + /// # Errors + /// Returns an error if parsing, graph building, or clustering fails. + #[pyfunction] + pub fn cluster_taxa_py( + sequence_scores_csv: String, + taxa_weights_csv: String, + similarity_threshold: f32 + ) -> String { + cluster_taxa(sequence_scores_csv, taxa_weights_csv, similarity_threshold).unwrap() + } + + /// Computes a "goodness" score for clustering results by combining + /// ranking similarity (via rank-biased overlap) and diversity (via entropy). + /// + /// # Arguments + /// * `clustered_taxa_weights_csv` - CSV string file containing clustered taxa weights. + /// * `peptonizer_results` - JSON string containing taxa scores produced by Peptonizer. + /// + /// # Returns + /// A `Result>` containing the computed goodness score, + /// or an error if parsing fails. + /// + /// # Errors + /// This function may return an error if the input CSV or JSON cannot be parsed. + #[allow(unsafe_op_in_unsafe_fn)] + #[pyfunction] + pub fn compute_goodness_py( + clustered_taxa_weights_csv: String, + peptonizer_results: String + ) -> f64 { + compute_goodness(clustered_taxa_weights_csv, peptonizer_results).unwrap() + } + + /// Returns a mapping from taxon ID to taxon name for all taxa provided. + /// + /// # Arguments + /// * `target_taxa` - A list of taxon IDs for which all corresponding taxon names should be retrieved. + /// + /// # Errors + /// Returns an error if the Unipept API server responds with a non-success status code + /// or if something goes wrong with the network or JSON parsing. + /// + /// # Returns + /// A JSON string mapping taxon IDs to their corresponding taxon names. + #[pyfunction] + pub fn get_names_for_taxa_py(target_taxa: Vec) -> String { + let names = block_on_binding_future(get_names_for_taxa(&target_taxa)).unwrap(); + serde_json::to_string(&names).unwrap() + } + + /// Read a CSV-file that was produced by the PepGM algorithm and use it to + /// produce a new CSV-file that only contains the taxon-related information + /// and scores. The string produced by this function can be written directly + /// to a valid CSV-file and contains three columns: `taxon_name`, `taxon_id`, + /// and `score`. + /// + /// # Arguments + /// * `csv_content` - A CSV-file (as a string) that has been generated by running the PepGM algorithm. + /// + /// # Returns + /// A `String` containing CSV rows with the columns: `taxon_name,taxon_id,score`. + #[pyfunction] + pub fn clean_csv_py(csv_content: String) -> String { + block_on_binding_future(clean_csv(csv_content)).unwrap() + } + + #[cfg(not(target_arch = "wasm32"))] + #[pymodule] + fn peptonizer_rust(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(parse_input_peptides_py, m)?)?; + m.add_function(wrap_pyfunction!(parse_unique_peptides_py, m)?)?; + m.add_function(wrap_pyfunction!(fetch_unipept_taxa_py, m)?)?; + m.add_function(wrap_pyfunction!(perform_taxa_weighing_py, m)?)?; + m.add_function(wrap_pyfunction!(generate_pepgm_graph_py, m)?)?; + m.add_function(wrap_pyfunction!(execute_pepgm_py, m)?)?; + m.add_function(wrap_pyfunction!(cluster_taxa_py, m)?)?; + m.add_function(wrap_pyfunction!(compute_goodness_py, m)?)?; + m.add_function(wrap_pyfunction!(clean_csv_py, m)?)?; + m.add_function(wrap_pyfunction!(get_names_for_taxa_py, m)?)?; + Ok(()) + } +} diff --git a/peptonizer_rust/src/random.rs b/peptonizer_rust/src/random.rs new file mode 100644 index 0000000..b083919 --- /dev/null +++ b/peptonizer_rust/src/random.rs @@ -0,0 +1,92 @@ + +use std::collections::HashSet; + + +/// Selects `n` unique random sample indices from a list of weights. +/// +/// The selection is based on weighted probabilities, where higher weights increase +/// the likelihood of being chosen. Sampling is without replacement (indices are unique). +/// +/// # Arguments +/// * `weights` - Vector of non-negative weights for each element. +/// * `n` - Number of unique indices to select. +/// +/// # Returns +/// A `HashSet` containing the selected indices. +/// +/// # Errors +/// This function will panic if: +/// * `weights` is empty. +/// * `n` is larger than `weights.len()`. +// #[cfg(not(target_arch = "wasm32"))] +pub fn select_random_samples_with_weights( + weights: Vec, + n: usize, +) -> Result, Box> { + use rand::prelude::*; + let mut rng = thread_rng(); + + let mut keys: Vec<(f64, usize)> = weights.iter().enumerate() + .filter(|&(_, &w)| w > 0.0) + .map(|(i, &w)| { + let u: f64 = rng.gen_range(0.0..1.0); + let key = u.powf(1.0/w); + (key, i) + }).collect(); + + keys.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap()); + let selected_idx: HashSet = keys.iter() + .take(n.min(keys.len())) + .map(|&(_, i)| i) + .collect(); + + Ok(selected_idx) +} + + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashSet; + + #[test] + fn test_basic_sampling() { + let weights = vec![1.0, 2.0, 3.0]; + let samples = select_random_samples_with_weights(weights.clone(), 2); + assert!(samples.is_ok()); + let samples = samples.unwrap(); + + assert_eq!(samples.len(), 2); + for &idx in &samples { + assert!(idx < weights.len()); + } + } + + #[test] + fn test_full_sampling() { + let weights = vec![0.5, 1.5, 2.0]; + let samples = select_random_samples_with_weights(weights.clone(), 3); + assert!(samples.is_ok()); + let samples = samples.unwrap(); + + // Must return all indices + assert_eq!(samples, HashSet::from([0, 1, 2])); + } + + #[test] + fn test_heavy_weight_bias() { + let weights = vec![1000.0, 0.0001, 0.0001]; + let mut counts = vec![0; 3]; + + for _ in 0..100 { + let s = select_random_samples_with_weights(weights.clone(), 1); + assert!(s.is_ok()); + let s = s.unwrap(); + let idx = *s.iter().next().unwrap(); + counts[idx] += 1; + } + + // Index 0 should dominate + assert!(counts[0] > 90); + } +} diff --git a/peptonizer_rust/src/taxa_clustering.rs b/peptonizer_rust/src/taxa_clustering.rs new file mode 100644 index 0000000..b941f75 --- /dev/null +++ b/peptonizer_rust/src/taxa_clustering.rs @@ -0,0 +1,316 @@ +use std::collections::{HashMap, HashSet}; +use serde::{Serialize, Deserialize, Deserializer}; +use csv::{ReaderBuilder, WriterBuilder}; +use crate::factor_graph::{parse_taxon_weights_csv, TaxonWeight}; + + + +/// Represents a taxonomic unit with attributes used for clustering. +#[derive(Deserialize, Serialize, Debug, Clone)] +pub struct Taxon { + /// Unique identifier of the taxon. + pub id: usize, + /// Identifier of the higher-level taxon it belongs to. + pub higher_taxa: usize, + /// Weight of the taxon, scaled for comparison. + pub scaled_weight: f32, + /// Whether the taxon is unique in the dataset. + pub unique: bool, + + /// IDs of taxa belonging to the same cluster. + /// Serialized as a string, deserialized back into a vector. + #[serde(default, serialize_with = "vec_to_string", deserialize_with = "string_to_vec")] + pub cluster_members: Vec +} + + +/// Converts a `Vec` to a serialized string representation. +/// +/// # Arguments +/// * `vec` - Reference to the vector to be serialized. +/// * `serializer` - Serializer provided by Serde. +/// +/// # Returns +/// Serialized string wrapped in the serializer's `Ok` type. +/// +/// # Errors +/// Returns an error if serialization fails. +fn vec_to_string(vec: &[usize], serializer: S) -> Result +where + S: serde::Serializer, +{ + let joined = &format!( + "[{}]", + vec.iter().map(|x| x.to_string()).collect::>().join(", ") + ); + serializer.serialize_str(joined) +} + + +/// Converts a serialized string back into a `Vec`. +/// +/// # Arguments +/// * `deserializer` - Deserializer provided by Serde. +/// +/// # Returns +/// Vector of integers parsed from the string. +/// +/// # Errors +/// Returns an error if the string cannot be deserialized or parsed into integers. +pub fn string_to_vec<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: &str = Deserialize::deserialize(deserializer)?; + let vec = s[1..(s.len()-1)].split(',') + .filter_map(|item| item.trim().parse::().ok()) + .collect(); + Ok(vec) +} + + +/// Parses a CSV string into a list of `Taxon`. +/// +/// # Arguments +/// * `taxa_weights_csv` - CSV content as string. +/// +/// # Returns +/// Vector of `Taxon`. +/// +/// # Errors +/// Returns an error if CSV parsing fails. +pub fn parse_taxon_csv(taxa_weights_csv: String) -> Result, Box> { + let mut rdr = ReaderBuilder::new() + .has_headers(true) + .from_reader(taxa_weights_csv.as_bytes()); + + let mut taxa_weights = Vec::new(); + for record in rdr.deserialize() { + let row: Taxon = record?; + taxa_weights.push(row); + } + + Ok(taxa_weights) +} + + +/// Serializes a list of `Taxon` into CSV format. +/// +/// # Arguments +/// * `taxa` - List of taxa to serialize. +/// +/// # Returns +/// CSV as a string. +/// +/// # Errors +/// Returns an error if serialization fails. +pub fn generate_taxa_cluster_csv(taxa: Vec) -> Result> { + let mut wtr = WriterBuilder::new().from_writer(vec![]); + + for taxon in &taxa { + wtr.serialize(taxon)?; + } + + let data = String::from_utf8(wtr.into_inner()?)?; + + Ok(data) +} + + +/// Clusters taxa based on peptidome similarity and returns a CSV. +/// +/// # Arguments +/// * `graph_xml` - GraphML as string. +/// * `taxa_weights_csv` - Taxa weights as CSV string. +/// * `similarity_threshold` - Threshold for clustering. +/// +/// # Returns +/// CSV string with taxa and their clusters. +/// +/// # Errors +/// Returns an error if parsing, graph building, or clustering fails. +pub fn cluster_taxa(sequence_scores_csv: String, taxa_weights_csv: String, similarity_threshold: f32) -> Result> { + + let sequence_scores = parse_taxon_weights_csv(sequence_scores_csv)?; + let taxa_weights = parse_taxon_csv(taxa_weights_csv)?; + + let peptidome_dict = get_peptides_per_taxon(&sequence_scores)?; + let (similarities, taxon_index) = compute_detected_peptidome_similarity(peptidome_dict); + + let taxa_weights: Vec = taxa_weights + .into_iter() + .filter(|tw| taxon_index.contains_key(&tw.higher_taxa)) + .collect(); + + let higher_taxa: Vec = taxa_weights.iter().map(|tw| tw.higher_taxa).collect(); + let mut weight_sorted_taxa: Vec = higher_taxa.clone(); + let mut taxa_clusters: Vec> = Vec::new(); + + let mut cluster_heads: Vec = Vec::new(); + + while ! weight_sorted_taxa.is_empty() { + let taxon1 = weight_sorted_taxa[0]; + let mut cluster_list: Vec = Vec::new(); + cluster_heads.push(taxon1); + + for &taxon2 in &higher_taxa { + if similarities[taxon_index[&taxon2]][taxon_index[&taxon1]] > similarity_threshold { + cluster_list.push(taxon2); + if weight_sorted_taxa.contains(&taxon2) { + weight_sorted_taxa.retain(|&taxon| taxon != taxon2); + } + } + } + + taxa_clusters.push(cluster_list); + } + + let mut cluster_weight_sorted_taxa: Vec = taxa_weights.into_iter() + .filter(|tw| cluster_heads.contains(&tw.higher_taxa)) + .collect(); + // TODO: should we also add rows of taxa_weights where higher_taxa appear in taxa_clusters? + // This doesn't seem to do anything in the python code (Bug probably) + for (taxon, cluster_members) in cluster_weight_sorted_taxa.iter_mut().zip(taxa_clusters.iter()) { + taxon.cluster_members = cluster_members.clone(); + } + + generate_taxa_cluster_csv(cluster_weight_sorted_taxa) +} + + +/// Builds a dictionary of peptides per taxon. +/// +/// # Arguments +/// * `graph` - Factor graph reference. +/// +/// # Returns +/// Map from taxon ID to peptide set. +/// +/// # Errors +/// Returns an error if node parsing fails. +fn get_peptides_per_taxon(taxon_weights: &Vec) -> Result>, Box> { + let mut peptidome_dict = HashMap::new(); + + // maps unique sequence string -> generated sequence_id + let mut sequence_to_id: HashMap = HashMap::new(); + let mut next_sequence_id = 0usize; + for tw in taxon_weights { + // get or create sequence_id + let sequence_id = match sequence_to_id.get(&tw.sequence) { + Some(id) => *id, + None => { + let id = next_sequence_id; + next_sequence_id += 1; + sequence_to_id.insert(tw.sequence.clone(), id); + id + } + }; + // insert sequence_id into higher_taxa set + peptidome_dict + .entry(tw.higher_taxa) + .or_insert_with(HashSet::new) + .insert(sequence_id); + } + + Ok(peptidome_dict) +} + + +/// Computes similarity matrix and taxon index map. +/// +/// # Arguments +/// * `peptidome_dict` - Map of taxon to peptides. +/// +/// # Returns +/// Tuple of (similarity matrix, taxon index map). +fn compute_detected_peptidome_similarity(peptidome_dict: HashMap>) -> (Vec>, HashMap) { + let mut sim_matrix = Vec::new(); + let mut taxon_index: HashMap = HashMap::new(); + + let peptidome_keys = peptidome_dict.keys(); + for (index, taxon1) in peptidome_keys.clone().enumerate() { + taxon_index.insert(*taxon1, index); + let set1 = &peptidome_dict[taxon1]; + let mut sim_row = Vec::new(); + for taxon2 in peptidome_keys.clone() { + let set2 = &peptidome_dict[taxon2]; + let shared = set1.intersection(set2).count(); + let sim: f32 = if set2.is_empty() { + 0.0 + } else { + shared as f32 / set2.len() as f32 + }; + + sim_row.push(sim); + } + sim_matrix.push(sim_row); + } + + (sim_matrix, taxon_index) +} + + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::{HashMap, HashSet}; + + #[test] + fn test_vec_to_string_and_string_to_vec() { + let values = vec![1, 2, 3]; + let serialized = serde_json::to_string(&values).unwrap(); + assert_eq!(serialized, "[1,2,3]"); + + let deserialized: Vec = serde_json::from_str(&serialized).unwrap(); + assert_eq!(deserialized, values); + } + + #[test] + fn test_parse_taxon_csv_and_generate_taxa_cluster_csv() { + let csv_data = "\ +id,higher_taxa,scaled_weight,unique +1,10,0.5,true +2,20,0.8,false +"; + + let taxa = parse_taxon_csv(csv_data.to_string()).unwrap(); + assert_eq!(taxa.len(), 2); + assert_eq!(taxa[0].id, 1); + assert_eq!(taxa[1].higher_taxa, 20); + + let out_csv = generate_taxa_cluster_csv(taxa).unwrap(); + assert!(out_csv.contains("id,higher_taxa,scaled_weight,unique,cluster_members")); + assert!(out_csv.contains("10")); + } + + #[test] + fn test_compute_detected_peptidome_similarity() { + let mut peptidome_dict: HashMap> = HashMap::new(); + peptidome_dict.insert(1, HashSet::from([1, 2])); + peptidome_dict.insert(2, HashSet::from([2, 3])); + + let (sim_matrix, taxon_index) = compute_detected_peptidome_similarity(peptidome_dict); + println!("{:?}", sim_matrix); + + assert_eq!(sim_matrix.len(), 2); + assert_eq!(sim_matrix[0].len(), 2); + assert!(taxon_index.contains_key(&1)); + assert!(taxon_index.contains_key(&2)); + assert!(sim_matrix[0][1] > 0.0); + } + + #[test] + fn test_generate_taxa_cluster_csv_roundtrip() { + let taxa = vec![ + Taxon { id: 1, higher_taxa: 10, scaled_weight: 0.5, unique: true, cluster_members: vec![10, 11] }, + Taxon { id: 2, higher_taxa: 20, scaled_weight: 0.8, unique: false, cluster_members: vec![20] }, + ]; + + let csv_string = generate_taxa_cluster_csv(taxa.clone()).unwrap(); + let parsed = parse_taxon_csv(csv_string).unwrap(); + + assert_eq!(parsed.len(), 2); + assert_eq!(parsed[0].cluster_members, vec![10, 11]); + assert_eq!(parsed[1].cluster_members, vec![20]); + } +} diff --git a/peptonizer_rust/src/unipept_communicator.rs b/peptonizer_rust/src/unipept_communicator.rs new file mode 100644 index 0000000..3158040 --- /dev/null +++ b/peptonizer_rust/src/unipept_communicator.rs @@ -0,0 +1,435 @@ +use std::collections::{HashMap, HashSet}; +use std::cmp::min; +use crate::http_client::*; +use futures::future::join_all; +use serde::{Serialize, Deserialize}; +use serde_json::{ Value }; + + +/// Base URL for the UniPept API +const UNIPEPT_URL: &str = "https://api.unipept.ugent.be"; +/// Endpoint for mapping peptides to filtered taxa +const UNIPEPT_PEPT2FILTERED_ENDPOINT: &str = "/api/v2/pept2taxa"; +/// Endpoint for retrieving taxonomic lineages +const UNIPEPT_TAXONOMY_ENDPOINT: &str = "/api/v2/taxonomy"; + +/// Maximum number of peptides per request to the peptide-to-taxa endpoint +const UNIPEPT_PEPTIDES_BATCH_SIZE: usize = 2000; + +/// Maximum number of taxa per request to the taxonomy endpoint +const TAXONOMY_ENDPOINT_BATCH_SIZE: usize = 100; + + +/// Standard NCBI taxonomy ranks for lineage retrieval +const NCBI_RANKS: &[&str] = &[ + "domain", + "realm", + "subkingdom", + "superphylum", + "phylum", + "subphylum", + "superclass", + "class", + "subclass", + "superorder", + "order", + "suborder", + "infraorder", + "superfamily", + "family", + "subfamily", + "tribe", + "subtribe", + "genus", + "subgenus", + "species_group", + "species_subgroup", + "species", + "subspecies", + "strain", + "varietas", + "forma" +]; + + +/// Payload structure for requesting taxonomy lineages from UniPept +#[derive(Serialize, Deserialize, Debug)] +pub struct HTTPTaxonomyPayload { + input: Vec, + extra: bool +} + +/// Payload structure for mapping peptides to taxa +#[derive(Serialize, Deserialize, Debug)] +pub struct HTTPPept2TaxaPayload { + input: Vec, + compact: bool, + tryptic: bool +} + +/// Response structure for peptide-to-taxa mapping +#[derive(Serialize, Deserialize, Debug)] +pub struct HTTPPept2TaxaResponse { + peptide: String, + taxa: Vec +} + +/// Payload structure for retrieving descendants of taxa at specified ranks +#[derive(Serialize, Deserialize, Debug)] +pub struct HTTPTaxonomyDescendantsPayload { + input: Vec, + descendants: bool, + descendants_ranks: Vec +} + +/// Response structure for retrieving descendants of a taxon +#[derive(Serialize, Deserialize, Debug)] +pub struct HTTPTaxonomyDescendantsResponse { + taxon_id: usize, + taxon_name: String, + taxon_rank: String, + descendants: Vec +} + +/// Represents a response from the UniPept taxonomy API +#[cfg(not(target_arch = "wasm32"))] +#[derive(Serialize, Deserialize, Debug)] +struct TaxonomyResponse { + taxon_id: usize, + taxon_name: String, +} + + +/// Parses a JSON string returned by the UniPept API into a vector of key-value maps +/// +/// # Arguments +/// * `http_response` - JSON string from UniPept API +/// +/// # Returns +/// Vector of hash maps, where each key maps to an `Option` value. Only numeric or null values are retained. +#[allow(clippy::type_complexity)] +fn parse_response_json_string(http_response: &str) -> HttpResult>>> { + let http_response_map: Vec>> = serde_json::from_str::>>(http_response)? + .into_iter() + .map(|mut obj: HashMap| { + // Remove the key-value pair where the value is a string + obj.retain(|_, v| v.is_null() || v.is_number()); + + // Convert the remaining keys and values to `HashMap>` + obj.into_iter() + .map(|(key, value)| { + let value = if value.is_number() { + Some(value.as_i64().unwrap() as usize) + } else { + None + }; + (key, value) + }) + .collect() + }) + .collect(); + + Ok(http_response_map) +} + +/// Retrieves the unique lineage taxa IDs at a specified taxonomic rank. +/// +/// This function queries the UniPept taxonomy API for the given `target_taxa` and extracts +/// the taxon IDs at the specified `taxa_rank`. To minimize API requests, it uses a cache +/// (`lineage_cache`) to store previously fetched lineages. +/// +/// # Arguments +/// +/// * `target_taxa` - A reference to a vector of taxon IDs for which the lineage is requested. +/// * `taxa_rank` - The target taxonomic rank (e.g., "species", "genus") at which the unique lineage is extracted. +/// * `lineage_cache` - A mutable reference to a hash map that stores previously fetched lineages. +/// +/// # Returns +/// +/// A vector of unique taxon IDs corresponding to the specified `taxa_rank`. +/// +/// # Panics +/// +/// The function will panic if: +/// - The `taxa_rank` does not exist in the predefined `NCBI_RANKS`. +pub async fn get_unique_lineage_at_specified_rank_async(target_taxa: &[usize], taxa_rank: &str, lineage_cache: &mut HashMap>>) -> HttpResult> { + + let url: String = [UNIPEPT_URL, UNIPEPT_TAXONOMY_ENDPOINT].concat(); + + // Remove duplicates from input + let target_taxa: HashSet = target_taxa.iter().cloned().collect(); + + // Prepare a list of taxa that are not yet in the cache + let taxa_to_request: Vec = target_taxa.iter().filter(| tax_id | ! lineage_cache.contains_key(tax_id)).cloned().collect(); + + let http_client = &create_http_client(); + let mut pending_requests = Vec::new(); + + for i in (0..taxa_to_request.len()).step_by(TAXONOMY_ENDPOINT_BATCH_SIZE) { + + let batch_size: usize = std::cmp::min(TAXONOMY_ENDPOINT_BATCH_SIZE, taxa_to_request.len() - i); + let batch: Vec = taxa_to_request[i..(i + batch_size)].to_vec(); + let payload = HTTPTaxonomyPayload { input: batch, extra: true }; + let payload_json = serde_json::to_string(&payload)?; + + pending_requests.push(http_client.perform_post_request(url.clone(), payload_json)); + } + + let responses = join_all(pending_requests).await; + + // Parse responses one-by-one after all requests have completed. + for (batch_idx, http_response) in responses.into_iter().enumerate() { + let http_response: String = http_response + .map_err(|e| format!("Failed to retrieve taxonomy data for batch {}. Error message: {}", batch_idx, e))?; + let http_response = parse_response_json_string(&http_response)?; + + for lineage_json in http_response { + let lineage_json: HashMap> = lineage_json; + let lineage: Vec> = NCBI_RANKS.iter() + .filter_map(|key| lineage_json.get(&format!("{key}_id")).cloned()) + .collect(); + let taxon_id: usize = lineage_json.get("taxon_id").ok_or("Taxon ID not in lineage")?.ok_or("Taxon ID is None")?; + lineage_cache.insert(taxon_id, lineage); + } + + } + + let rank_idx = NCBI_RANKS.iter().position(|&ncbi_rank| ncbi_rank == taxa_rank).ok_or("Taxa rank not found in NCBI ranks")?; + let lineage: HashSet = target_taxa.iter() + .filter_map(|taxon| lineage_cache.get(taxon).and_then(|lineage| lineage[rank_idx])) + .collect(); + let lineage: Vec = lineage.into_iter().collect(); + + Ok(lineage) +} + +/// Queries Unipept and returns all the taxa that are associated with the given list of peptides. +/// +/// For each peptide in the input, an entry in the output map is created, which points to the +/// taxon IDs associated with this peptide. +/// +/// # Arguments +/// +/// * `peptides` - A list of peptide sequences for which all associated taxa should be queried. +/// +/// # Errors +/// +/// Returns an error if the Unipept API server responds with an error or if a network issue occurs. +/// +/// # Returns +/// +/// A map from each peptide in the input list to its associated taxa IDs. +pub async fn get_taxa_for_peptides_async(peptides: Vec) -> HttpResult>> { + + let url = [UNIPEPT_URL, UNIPEPT_PEPT2FILTERED_ENDPOINT].concat(); + + let mut output = HashMap::new(); + + let http_client = &create_http_client(); + let mut pending_requests = Vec::new(); + + for i in (0..peptides.len()).step_by(UNIPEPT_PEPTIDES_BATCH_SIZE) { + + let end_batch = min(i+UNIPEPT_PEPTIDES_BATCH_SIZE, peptides.len()); + let batch = peptides[i..end_batch].to_vec(); + let payload = HTTPPept2TaxaPayload { input: batch, compact: true, tryptic: true }; + let payload_json = serde_json::to_string(&payload)?; + + pending_requests.push(http_client.perform_post_request(url.clone(), payload_json)); + } + + let responses = join_all(pending_requests).await; + + // Parse responses one-by-one after all requests have completed. + for (batch_idx, http_response) in responses.into_iter().enumerate() { + let http_response: String = http_response + .map_err(|e| format!("Failed to retrieve taxa data for batch {}. Error message: {}", batch_idx, e))?; + + let http_response = serde_json::from_str::>(&http_response)?; + + for peptide_data in &http_response { + let original_taxa: Vec = peptide_data.taxa.clone(); + output.insert(peptide_data.peptide.clone(), original_taxa); + } + } + + Ok(output) +} + +/// Returns a list of all taxon IDs that are descendants of the given taxa in `target_taxa`. +/// +/// # Arguments +/// +/// * `target_taxa` - A list of taxon IDs for which all descendants at a specific NCBI rank (and lower) should be retrieved. +/// * `descendants_rank` - The maximum rank that each of the descendants should have in the NCBI taxonomy. +/// All descendants that are defined at this rank or deeper are reported. +/// +/// # Errors +/// +/// Returns an error if the Unipept API server responds with an error, or if something goes wrong +/// with the network. +/// +/// # Returns +/// +/// A list of taxon IDs that meet the given rank criteria. +pub async fn get_descendants_for_taxa_async(target_taxa: Vec, descendant_rank: String) -> HttpResult> { + + let url = [UNIPEPT_URL, UNIPEPT_TAXONOMY_ENDPOINT].concat(); + let mut all_descendants = HashSet::new(); + + // We need to get all children at the requested level, AND at lower levels. That's what we are using the ranks array for. + let rank_idx = NCBI_RANKS.iter().position(|&ncbi_rank| ncbi_rank == descendant_rank).ok_or("descendants rank not found in NCBI ranks")?; + let descentants_ranks: Vec = NCBI_RANKS[rank_idx..].iter().map(|&s| s.to_string()).collect(); + + let http_client = &create_http_client(); + let mut pending_requests = Vec::new(); + + for i in (0..target_taxa.len()).step_by(TAXONOMY_ENDPOINT_BATCH_SIZE) { + + let end_batch = min(i+TAXONOMY_ENDPOINT_BATCH_SIZE, target_taxa.len()); + let batch = target_taxa[i..end_batch].to_vec(); + let payload = HTTPTaxonomyDescendantsPayload { input: batch, descendants: true, descendants_ranks: descentants_ranks.clone() }; + let payload_json = serde_json::to_string(&payload)?; + + pending_requests.push(http_client.perform_post_request(url.clone(), payload_json)); + } + + let responses = join_all(pending_requests).await; + + // Parse responses one-by-one after all requests have completed. + for (batch_idx, http_response) in responses.into_iter().enumerate() { + let http_response = http_response + .map_err(|e| format!("Failed to retrieve taxonomy data for batch {}. Error message: {}", batch_idx, e))?; + let http_response = serde_json::from_str::>(&http_response)?; + + for response in http_response { + all_descendants.extend(response.descendants); + } + } + + Ok(all_descendants) +} + +/// Returns a mapping from taxon ID to taxon name for all taxa provided. +/// +/// # Arguments +/// * `target_taxa` - A list of taxon IDs for which all corresponding taxon names should be retrieved. +/// +/// # Errors +/// Returns an error if the Unipept API server responds with a non-success status code +/// or if something goes wrong with the network or JSON parsing. +/// +/// # Returns +/// A `HashMap` mapping taxon IDs to their corresponding taxon names. +#[cfg(not(target_arch = "wasm32"))] +pub async fn get_names_for_taxa(target_taxa: &[usize]) -> HttpResult> { + let url = format!("{UNIPEPT_URL}{UNIPEPT_TAXONOMY_ENDPOINT}"); + let mut output: HashMap = HashMap::new(); + + let http_client = &create_http_client(); + let mut pending_requests = Vec::new(); + + for i in (0..target_taxa.len()).step_by(TAXONOMY_ENDPOINT_BATCH_SIZE) { + let batch: Vec = target_taxa[i..std::cmp::min(i + TAXONOMY_ENDPOINT_BATCH_SIZE, target_taxa.len())] + .to_vec(); + + let payload = serde_json::json!({ + "input": batch + }); + let payload_json = serde_json::to_string(&payload)?; + + pending_requests.push(http_client.perform_post_request(url.clone(), payload_json)); + } + + let responses = join_all(pending_requests).await; + + // Parse responses one-by-one after all requests have completed. + for http_response in responses { + let http_response = http_response.map_err(|e| format!("Communication error: {e}"))?; + + let http_response = serde_json::from_str::>(&http_response)?; + + for response in http_response { + output.insert(response.taxon_id, response.taxon_name); + } + } + + Ok(output) +} + + + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + #[test] + fn test_parse_response_json_string() { + let http_response = r#" + [ + {"taxon_id": 1, "species_id": 9606, "genus_id": null, "name": "Homo sapiens"}, + {"taxon_id": 2, "species_id": null, "genus_id": 9605, "name": "Pan troglodytes"} + ] + "#; + + let parsed = parse_response_json_string(http_response); + assert!(parsed.is_ok()); + let parsed = parsed.unwrap(); + + assert_eq!(parsed.len(), 2); + assert_eq!(parsed[0].get("taxon_id"), Some(&Some(1))); + assert_eq!(parsed[0].get("species_id"), Some(&Some(9606))); + assert_eq!(parsed[0].get("genus_id"), Some(&None)); + assert!(parsed[0].get("name").is_none()); + assert_eq!(parsed[1].get("taxon_id"), Some(&Some(2))); + assert_eq!(parsed[1].get("genus_id"), Some(&Some(9605))); + } + + #[tokio::test] + async fn test_get_unique_lineage_at_specified_rank_with_cache() { + let mut lineage_cache: HashMap>> = HashMap::new(); + lineage_cache.insert(1, vec![Some(1), Some(10), Some(100)]); + lineage_cache.insert(2, vec![Some(2), Some(20), Some(200)]); + + let target_taxa = vec![1, 2]; + let rank = NCBI_RANKS[1]; + let lineage = get_unique_lineage_at_specified_rank_async(&target_taxa, rank, &mut lineage_cache).await; + assert!(lineage.is_ok()); + let lineage = lineage.unwrap(); + + assert!(lineage.contains(&10)); + assert!(lineage.contains(&20)); + assert_eq!(lineage.len(), 2); + } + + #[tokio::test] + async fn test_get_descendants_for_taxa_structure() { + let descendants = get_descendants_for_taxa_async(vec![200, 701], "species".to_string()).await; + assert!(descendants.is_ok()); + let descendants = descendants.unwrap(); + + assert!(descendants.len() == 4); + } + + #[tokio::test] + async fn test_get_names_for_taxa_structure() { + let taxa = vec![1, 2]; + let result = get_names_for_taxa(&taxa).await; + assert!(result.is_ok()); + + let names = result.unwrap(); + assert_eq!(names.get(&1).unwrap(), "root"); + assert_eq!(names.get(&2).unwrap(), "Bacteria"); + } + + #[tokio::test] + async fn test_get_taxa_for_peptides_structure() { + let peptides = vec!["AAEEAAAA".to_string(), "AAAAEEA".to_string()]; + let result = get_taxa_for_peptides_async(peptides).await; + assert!(result.is_ok()); + let result = result.unwrap(); + + assert_eq!(result.get("AAAAEEA").unwrap().len(), 3); + } +} diff --git a/peptonizer_rust/src/utils.rs b/peptonizer_rust/src/utils.rs new file mode 100644 index 0000000..51e724e --- /dev/null +++ b/peptonizer_rust/src/utils.rs @@ -0,0 +1,22 @@ +/// Cross-platform logging utilities for Rust/WASM. +/// +/// These functions provide a unified interface for logging messages to the console, +/// both when running natively (non-WASM) and in a WebAssembly (WASM) environment. + +#[cfg(target_arch = "wasm32")] +use wasm_bindgen::prelude::*; + +#[cfg(target_arch = "wasm32")] +#[wasm_bindgen] +extern "C" { + /// Logs a message to the JavaScript console (`console.log`) when running in WASM. + #[wasm_bindgen(js_namespace = console)] + pub fn log(s: &str); +} + +/// Logs a message to the console when running natively (non-WASM). +#[cfg(not(target_arch = "wasm32"))] +pub fn log(s: &str) { + println!("{s}"); +} + diff --git a/peptonizer_rust/src/weight_taxa.rs b/peptonizer_rust/src/weight_taxa.rs new file mode 100644 index 0000000..2cb8ca8 --- /dev/null +++ b/peptonizer_rust/src/weight_taxa.rs @@ -0,0 +1,348 @@ +use std::collections::{HashMap, HashSet}; +use crate::utils::log; +use crate::random::select_random_samples_with_weights; +use csv::Writer; +use crate::unipept_communicator::get_unique_lineage_at_specified_rank_async; + +/// Represents the main pipeline for weighting taxa based on peptide evidence. +/// +/// # Arguments +/// +/// * `pep_taxa` - JSON string mapping peptide sequences to lists of taxon IDs. +/// * `pep_scores` - JSON string mapping peptide sequences to their scores (float). +/// * `pep_psm_counts` - JSON string mapping peptide sequences to their PSM counts (int). +/// * `max_taxa` - Maximum number of taxa to include in output. +/// * `taxa_rank` - Optional NCBI rank at which taxa should be normalized. +/// If `None`, input taxa are assumed to already be normalized. +/// +/// # Returns +/// +/// Tuple `(sequence_csv, taxa_weights_csv)`: +/// * `sequence_csv` - CSV string of peptide sequences and their weights. +/// * `taxa_weights_csv` - CSV string of taxa weights and uniqueness. +pub async fn perform_taxa_weighing( + pep_taxa: String, + pep_scores: String, + pep_psm_counts: String, + max_taxa: usize, + taxa_rank: Option +) -> Result<(String, String), Box> { + log("Parsing Unipept responses from disk..."); + let pep_taxa: HashMap> = serde_json::from_str(&pep_taxa)?; + + let sequences: Vec = pep_taxa.keys().map(|seq| seq.to_owned()).collect(); + + let mut taxa: Vec> = pep_taxa.into_values().collect(); + + if let Some(taxa_rank) = taxa_rank { + log("Started mapping all taxon ids to the specified rank..."); + normalize_unipept_responses(&mut taxa, &taxa_rank).await?; + } else { + log("Skipping taxon normalization because taxa_rank is None..."); + } + + let chosen_idx: HashSet = weighted_random_sample(&taxa, 10000)?; + + log(&format!("Using {} sequences as input...", chosen_idx.len())); + + log("Normalizing peptides and converting to vectors..."); + + let sequences: Vec = chosen_idx.iter().map(|idx| sequences[*idx].to_owned()).collect(); + let taxa: Vec> = chosen_idx.iter().map(|idx| taxa[*idx].to_owned()).collect(); + + // Parse scores from JSON string to hashmap, only keep the randomly selected samples. + let pep_scores_map: HashMap = serde_json::from_str(&pep_scores)?; + let mut pep_scores: Vec = vec![0.0; sequences.len()]; + for i in 0..sequences.len() { + pep_scores[i] = pep_scores_map[&sequences[i]]; + } + + // parse counts from JSON string to hashmap, only keep the randomly selected samples. + let pep_psm_counts_map: HashMap = serde_json::from_str(&pep_psm_counts)?; + let mut pep_psm_counts: Vec = vec![0; sequences.len()]; + for i in 0..sequences.len() { + pep_psm_counts[i] = pep_psm_counts_map[&sequences[i]]; + } + + /* Score the degeneracy of a taxa, i.e., + how conserved a peptide sequence is between taxa. + map all taxids in the list in the taxa column back to their taxid at species level (or the rank specified by the user) + Right now, HigherTaxa is simply a copy of taxa. This step still needs to be optimized. + Move taxa to highertaxa because taxa is not used anymore. + */ + let higher_taxa: Vec> = taxa; + + // Divide the number of PSMs of a peptide by the number of taxa the peptide is associated with, exponentiated by 3 + log("Started dividing the number of PSMS of a peptide by the number the peptide is associated with..."); + let weights: Vec = pep_psm_counts.iter() + .zip(higher_taxa.iter().map(|taxa| taxa.len().pow(3))) + .map(|(&count, len_cube)| count as f32 / len_cube as f32) + .collect(); + + let unique_psm_taxa: HashSet = higher_taxa.iter() + .filter(|tax| tax.len() == 1) + .map(|tax| tax[0]) + .collect(); + + // Sum up the weights of a taxon and sort by weight + log("Started summing the weights of a taxon and sorting them by weight..."); + let log_weights: Vec = weights.iter().map(|w| (w + 1.0).log10()).collect(); + + // Since large proteomes tend to have more detectable peptides, + // we adjust the weight by dividing by the size of the proteome i.e., + // the number of proteins that are associated with a taxon + let scaled_weight = log_weights.clone(); + + let mut tax_id_weights: HashMap = HashMap::new(); + for (ids, weight) in higher_taxa.clone().into_iter().zip(scaled_weight.clone().into_iter()) { + for id in ids { + *tax_id_weights.entry(id).or_insert(0.0) += weight; + } + } + let mut sorted_tax_id_weights: Vec<(usize, f32)> = tax_id_weights.into_iter().collect(); + sorted_tax_id_weights.sort_by(|a, b| b.1.partial_cmp(&a.1).expect("Partial compare returned None")); + let (tax_ids, tax_id_weights): (Vec, Vec) = sorted_tax_id_weights.into_iter().unzip(); + + // Retrieves the specified taxonomic rank taxid in the lineage of each of the species-level taxids returned by + // Unipept for both the UnipeptFrame and the TaxIdWeightFrame + let higher_unique_psm_taxids = unique_psm_taxa; + + // Group the duplicate entries of higher up taxa and sum their weights + let higher_taxid_weights = tax_id_weights; + + let higher_taxid_unique: Vec = tax_ids.iter().map(|id| higher_unique_psm_taxids.contains(id)).collect(); + + // TODO: Why hardcoded < 50 + let sequence_csv = if higher_taxid_weights.len() < 50 { + generate_sequence_csv(None, false, sequences, pep_scores, pep_psm_counts, higher_taxa, weights, log_weights)? + } else { + let mut taxa_to_include: HashSet = tax_ids.iter().take(max_taxa).cloned().collect(); + taxa_to_include.extend(higher_unique_psm_taxids); + + generate_sequence_csv(Some(taxa_to_include), true, sequences, pep_scores, pep_psm_counts, higher_taxa, weights, log_weights)? + }; + + let taxa_weights_csv = generate_taxa_weights_csv(tax_ids, higher_taxid_weights, higher_taxid_unique)?; + + Ok((sequence_csv, taxa_weights_csv)) +} + +/// Generates a CSV for sequences with associated taxonomic weights and scores. +/// +/// # Arguments +/// +/// * `taxa_to_include` - Optional set of taxa IDs to filter the output. +/// * `filter_taxa` - Whether to filter sequences based on `taxa_to_include`. +/// * `sequences` - List of peptide sequences. +/// * `scores` - List of peptide scores corresponding to `sequences`. +/// * `psms` - List of PSM counts corresponding to `sequences`. +/// * `higher_taxa` - List of lists of higher taxa IDs for each sequence. +/// * `weights` - Computed weights for each sequence. +/// * `log_weights` - Log-transformed weights for each sequence. +/// +/// # Returns +/// +/// CSV string containing one row per peptide-taxon pair with columns: +/// "id", "sequence", "score", "psms", "higher_taxa", "weight", "log_weight". +#[allow(clippy::too_many_arguments)] +fn generate_sequence_csv(taxa_to_include: Option>, filter_taxa: bool, sequences: Vec, scores: Vec, psms: Vec, higher_taxa: Vec>, weights: Vec, log_weights: Vec) -> Result> { + + let mut wtr = Writer::from_writer(vec![]); + + let _ = wtr.write_record(["id", "sequence", "score", "psms", "higher_taxa", "weight", "log_weight"]); + + let mut id = 0; + for i in 0..sequences.len() { + for taxon in &higher_taxa[i] { + if (! filter_taxa) || taxa_to_include.as_ref().ok_or("No taxa to include passed while filter_taxa enabled")?.contains(taxon) { + wtr.write_record(&[ + id.to_string(), + sequences[i].clone(), + scores[i].to_string(), + psms[i].to_string(), + taxon.to_string(), + weights[i].to_string(), + log_weights[i].to_string() + ])?; + id += 1; + } + } + } + + let csv: String = String::from_utf8(wtr.into_inner()?)?; + + Ok(csv) +} + +/// Generates a CSV of taxa weights. +/// +/// # Arguments +/// +/// * `higher_taxa` - List of taxon IDs. +/// * `higher_taxid_weights` - List of computed weights corresponding to `higher_taxa`. +/// * `higher_taxid_unique` - List indicating whether each taxon is uniquely associated with a peptide. +/// +/// # Returns +/// +/// CSV string with columns: "id", "higher_taxa", "scaled_weight", "unique". +fn generate_taxa_weights_csv(higher_taxa: Vec, higher_taxid_weights: Vec, higher_taxid_unique: Vec) -> Result> { + let mut wtr = Writer::from_writer(vec![]); + + let _ = wtr.write_record(["id", "higher_taxa", "scaled_weight", "unique"]); + + for i in 0..higher_taxa.len() { + wtr.write_record(&[ + i.to_string(), + higher_taxa[i].to_string(), + higher_taxid_weights[i].to_string(), + higher_taxid_unique[i].to_string() + ])?; + } + + let csv: String = String::from_utf8(wtr.into_inner()?)?; + + Ok(csv) + +} + +/// Maps taxa lists onto the taxonomic rank specified by the user. +/// +/// # Arguments +/// +/// * `taxa` - Mutable reference to a vector of vectors of taxon IDs. +/// * `taxa_rank` - The desired taxonomic rank to normalize to (e.g., "species"). +pub(crate) async fn normalize_unipept_responses(taxa: &mut [Vec], taxa_rank: &str) -> Result<(), Box> { + + // TODO: should we first do get_lineages_for_taxa to limit Unipept calls (see python)? + let mut lineage_cache: HashMap>> = HashMap::new(); + + // Map all taxa onto the rank specified by the user + for taxon in taxa { + *taxon = get_unique_lineage_at_specified_rank_async(taxon, taxa_rank, &mut lineage_cache) + .await + .map_err(|e| -> Box { e })?; + } + + Ok(()) +} + +/// Selects `n` random indices from `taxa` vectors, weighted by inverse degeneracy. +/// +/// # Arguments +/// +/// * `taxa` - Vector of vectors of taxon IDs for each peptide. +/// * `n` - Number of random samples to select. +/// +/// # Returns +/// +/// A `HashSet` of selected indices, chosen with probability proportional to +/// `1 / number_of_taxa_per_peptide`. +fn weighted_random_sample(taxa: &[Vec], n: usize) -> Result, Box> { + + // Calculate normalized weights based on the length of the taxa array + let weights: Vec = taxa.iter().map(|taxon| if taxon.is_empty() { 0.0 } else { 1.0 / taxon.len() as f64 }).collect(); + let total_weight: f64 = weights.iter().sum(); + let normalized_weights: Vec = weights.iter().map(|w| w / total_weight).collect(); + + let samples: HashSet = select_random_samples_with_weights(normalized_weights, n)?; + + Ok(samples) +} + + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashSet; + + #[tokio::test] + async fn test_perform_taxa_weighing_basic() { + let pep_taxa_json = r#"{"PEP1":[3000],"PEP2":[3500]}"#.to_string(); + let pep_scores_json = r#"{"PEP1":0.8,"PEP2":0.5}"#.to_string(); + let pep_psm_counts_json = r#"{"PEP1":4,"PEP2":2}"#.to_string(); + let max_taxa = 10; + let taxa_rank = None; + + let csvs = perform_taxa_weighing( + pep_taxa_json, + pep_scores_json, + pep_psm_counts_json, + max_taxa, + taxa_rank + ) + .await; + assert!(csvs.is_ok()); + let (seq_csv, taxa_csv) = csvs.unwrap(); + + assert!(seq_csv.contains("sequence")); + assert!(seq_csv.contains("PEP1")); + assert!(taxa_csv.contains("higher_taxa")); + } + + #[test] + fn test_generate_sequence_csv_basic() { + let sequences = vec!["PEP1".to_string(), "PEP2".to_string()]; + let scores = vec![0.8, 0.5]; + let psms = vec![4, 2]; + let higher_taxa = vec![vec![3000], vec![3001]]; + let weights = vec![0.5, 0.2]; + let log_weights = vec![0.18, 0.079]; + + let csv = generate_sequence_csv(None, false, sequences, scores, psms, higher_taxa, weights, log_weights); + assert!(csv.is_ok()); + let csv = csv.unwrap(); + assert!(csv.contains("sequence")); + assert!(csv.contains("PEP1")); + assert!(csv.contains("1")); + } + + #[test] + fn test_generate_sequence_csv_with_filter() { + let sequences = vec!["PEP".to_string()]; + let scores = vec![0.9]; + let psms = vec![2]; + let higher_taxa = vec![vec![10,11,12]]; + let weights = vec![0.2]; + let log_weights = vec![0.042]; + let filter_taxa: HashSet = vec![11,12].into_iter().collect(); + + let csv = generate_sequence_csv(Some(filter_taxa), true, sequences, scores, psms, higher_taxa, weights, log_weights); + assert!(csv.is_ok()); + let csv = csv.unwrap(); + assert!(csv.contains("12")); + assert!(!csv.contains("10")); + } + + #[test] + fn test_generate_taxa_weights_csv_basic() { + let higher_taxa = vec![1,2]; + let weights = vec![0.5, 0.8]; + let unique_flags = vec![true, false]; + + let csv = generate_taxa_weights_csv(higher_taxa, weights, unique_flags); + assert!(csv.is_ok()); + let csv = csv.unwrap(); + assert!(csv.contains("higher_taxa")); + assert!(csv.contains("0")); + assert!(csv.contains("true")); + } + + #[tokio::test] + async fn test_normalize_unipept_responses_basic() { + let mut taxa = vec![vec![3000]]; + let _ = normalize_unipept_responses(&mut taxa, "species").await; + assert!(taxa.iter().all(|v| !v.is_empty())); + } + + #[test] + fn test_weighted_random_sample_basic() { + let taxa = vec![vec![1], vec![2,3], vec![4]]; + let n = 2; + let samples = weighted_random_sample(&taxa, n); + assert!(samples.is_ok()); + let samples = samples.unwrap(); + + assert_eq!(samples.len(), n); + assert!(samples.iter().all(|&idx| idx < taxa.len())); + } +} diff --git a/peptonizer_rust/src/zero_lookahead_belief_propagation.rs b/peptonizer_rust/src/zero_lookahead_belief_propagation.rs new file mode 100644 index 0000000..5da8076 --- /dev/null +++ b/peptonizer_rust/src/zero_lookahead_belief_propagation.rs @@ -0,0 +1,45 @@ +use std::collections::HashMap; +use nori::zero_lookahead_bp_from_graph_bytes; + + +/// Runs belief propagation on a factor graph provided as a GraphML string. +/// +/// This function constructs the factor graph, fills in factor tables and priors, +/// splits the graph into connected components, and performs loopy belief propagation +/// on each component. The result is returned as a CSV string. +/// +/// # Arguments +/// +/// * `graph` - GraphML representation of the factor graph. +/// * `alpha` - Noisy-OR factor alpha parameter. +/// * `beta` - Noisy-OR factor beta parameter. +/// * `regularized` - Whether to regularize factor tables to penalize large numbers of parents. +/// * `prior` - Prior belief for taxon nodes. +/// * `max_iter` - Maximum number of belief propagation iterations. +/// * `tol` - Tolerance threshold for message convergence. +/// +/// # Returns +/// +/// CSV string with one row per node containing columns: +/// `[node_name, posterior_probability_1, node_category]` +pub fn run_belief_propagation( + graph_bytes: &[u8], + alpha: f32, + beta: f32, + regularized: bool, + prior: f32, + max_iter: Option, + tol: Option +) -> Result> { + let results = zero_lookahead_bp_from_graph_bytes(graph_bytes, alpha, beta, regularized, prior, max_iter, tol).unwrap(); + + let taxon_score_dict: HashMap = results + .into_iter() + .filter_map(|(key, values)| { + values.get(1).map(|&v| (key, v)) + }) + .collect(); + + Ok(serde_json::to_string(&taxon_score_dict)?) +} + diff --git a/peptonizer_ts/package.json b/peptonizer_ts/package.json index 2238ec2..8571ad2 100644 --- a/peptonizer_ts/package.json +++ b/peptonizer_ts/package.json @@ -1,7 +1,7 @@ { "name": "peptonizer", "private": true, - "version": "0.0.31", + "version": "0.1.11", "type": "module", "types": "dist/types.d.ts", "files": [ @@ -9,18 +9,18 @@ ], "module": "./dist/peptonizer.js", "scripts": { - "dev": "vite", + "dev": "vite --host", "build": "tsc && vite build", "preview": "vite preview" }, "devDependencies": { "typescript": "^5.8.2", - "vite": "^6.2.0" + "vite": "^6.2.0", + "vite-plugin-wasm": "^3.6.0" }, "dependencies": { "@types/async": "^3.2.24", "async": "^3.2.6", - "pyodide": "0.26.4", "rollup-plugin-dts": "^6.1.1", "vite-plugin-dts": "^4.5.3" } diff --git a/peptonizer_ts/peptonizer-0.1.11.tgz b/peptonizer_ts/peptonizer-0.1.11.tgz new file mode 100644 index 0000000..11d93b0 Binary files /dev/null and b/peptonizer_ts/peptonizer-0.1.11.tgz differ diff --git a/peptonizer_ts/peptonizer-v0.0.26.tgz b/peptonizer_ts/peptonizer-v0.0.26.tgz new file mode 100644 index 0000000..9344ad6 --- /dev/null +++ b/peptonizer_ts/peptonizer-v0.0.26.tgz @@ -0,0 +1,1993 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Peptonizer2000/peptonizer_ts/peptonizer-v0.0.26.tgz at master · compomics/Peptonizer2000 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+ Skip to content + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + +
+ + + + + + + + + + + +
+
+
+ + + + + + + + + + + + + + + + + + +
+ Open in github.dev + Open in a new github.dev tab + Open in codespace + + + + + + + + + + + + + + + + + + +

Files

Latest commit

 

History

History
159 KB

peptonizer-v0.0.26.tgz

File metadata and controls

159 KB
+
+ + + + +
+ +
+ +
+
+ +
+ +
+

Footer

+ + + + +
+
+ + + + + © 2025 GitHub, Inc. + +
+ + +
+
+ + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + diff --git a/peptonizer_ts/src/Peptonizer.ts b/peptonizer_ts/src/Peptonizer.ts index bd5eb76..adcb1e6 100644 --- a/peptonizer_ts/src/Peptonizer.ts +++ b/peptonizer_ts/src/Peptonizer.ts @@ -23,7 +23,6 @@ class Peptonizer { * detecting a peptide at random. * @param priors An array of possible values for the gamma (or prior) parameter. Gamma indicates the prior probability * of a taxon being present. - * @param rank At which NCBI taxonomic rank should the Peptonizer perform the taxonomic inference. * @param taxaInGraph How many taxa are being used in the graphical model? * @param progressListener Is called everytime the progress of the belief propagation algorithm has been updated. * @param workers The amount of Web Workers that can be spawned and used simultaneously to run the Peptonizer. @@ -37,7 +36,6 @@ class Peptonizer { alphas: number[], betas: number[], priors: number[], - rank: string = "species", taxaInGraph: number = 100, progressListener?: PeptonizerProgressListener, workers: number = 2 @@ -67,7 +65,6 @@ class Peptonizer { peptidesTaxa, peptidesScores, peptidesCounts, - rank, taxaInGraph ); @@ -76,7 +73,7 @@ class Peptonizer { } const [sequenceScoresCsv, taxonWeightsCsv] = taxonWeighingResult; - const generatedGraph = await this.workerPool.generateGraph(sequenceScoresCsv); + const factor_graph_bytes = await this.workerPool.generateGraph(sequenceScoresCsv); const pepgmPromises: Promise[] = []; @@ -84,14 +81,16 @@ class Peptonizer { return; } + console.time("Execution time pepgm total"); for (const paramSet of parameterSets) { pepgmPromises.push( - this.workerPool.executePepgm(generatedGraph, paramSet.alpha, paramSet.beta, paramSet.prior, progressListener) + this.workerPool.executePepgm(factor_graph_bytes, paramSet.alpha, paramSet.beta, paramSet.prior, progressListener) ); } // Wait until all parameter sets have been tuned... const peptonizerResults = await Promise.all(pepgmPromises); + console.timeEnd("Execution time pepgm total"); // Now that we have all the results generated by the peptonizer, we need to figure out which one yields the best // results @@ -101,7 +100,7 @@ class Peptonizer { } // First compute the clustered taxa weights - const clusteredTaxaWeightsCsv = await this.workerPool.clusterTaxa(generatedGraph, taxonWeightsCsv); + const clusteredTaxaWeightsCsv = await this.workerPool.clusterTaxa(sequenceScoresCsv, taxonWeightsCsv); if (this.isCancelled) { return; diff --git a/peptonizer_ts/src/main.ts b/peptonizer_ts/src/main.ts index 4beffb4..dd44496 100644 --- a/peptonizer_ts/src/main.ts +++ b/peptonizer_ts/src/main.ts @@ -4,6 +4,7 @@ import peptonizerLogo from "./peptonizer.jpg" import {PeptonizerParameterSet, PeptonizerProgressListener} from "./PeptonizerProgressListener.ts"; import { Peptonizer } from "./Peptonizer.ts"; import {PeptonizerInputParser} from "./PeptonizerInputParser.ts"; +import {WorkerPool} from "./workers/WorkerPool.ts"; document.querySelector('#app')!.innerHTML= `
@@ -179,16 +180,25 @@ const startToPeptonize = async function() { const [peptidesScores, peptidesCounts] = PeptonizerInputParser.parse(fileContents); try { + + let workerPool = new WorkerPool(1); + const rank: string = "species"; + const taxonQuery: number[] = [2, 3]; + const peptidesTaxaString = await workerPool.fetchUnipeptTaxonInfo(peptidesScores, rank, taxonQuery); + + const peptidesTaxaJson = JSON.parse(peptidesTaxaString); + const peptidesTaxa: Map = new Map(Object.entries(peptidesTaxaJson)); + const peptonizerResult = await peptonizer.peptonize( - new Map(), + peptidesTaxa, peptidesScores, peptidesCounts, alphas, betas, priors, - "species", 50, - new ProgressListener(document.getElementById("progress-view")!, 2) + new ProgressListener(document.getElementById("progress-view")!, 2), + 1 // 1 worker for debuggin purposes ); diff --git a/peptonizer_ts/src/workers/PeptonizerWorker.ts b/peptonizer_ts/src/workers/PeptonizerWorker.ts index fbb84cf..ccea588 100644 --- a/peptonizer_ts/src/workers/PeptonizerWorker.ts +++ b/peptonizer_ts/src/workers/PeptonizerWorker.ts @@ -1,19 +1,18 @@ /** * This worker contains all instructions to run the different steps that are required by the Peptonizer. All of these * functions are implemented in the same Worker instance and are loaded at the same time into memory instead of into - * smaller separate workers. Since all of these workers require a running Pyodide instance, it's more efficient - * (memory-wise) to bundle all Python functionality into one bigger worker. + * smaller separate workers. * * @author Pieter Verschaffelt */ -import {loadPyodide, PyodideInterface} from 'pyodide'; -import peptonizerWhlBase64 from "./lib/peptonizer-0.1-py3-none-any.base64.whl?raw"; import { ClusterTaxaTaskData, ClusterTaxaTaskDataResult, ComputeGoodnessDataResult, ComputeGoodnessTaskData, ExecutePepgmTaskData, ExecutePepgmTaskDataResult, + FetchUnipeptTaxonTaskData, + FetchUnipeptTaxonTaskResult, GenerateGraphTaskData, GenerateGraphTaskDataResult, InputEventData, @@ -23,119 +22,96 @@ import { ResultType, WorkerTask } from "./PeptonizerWorkerTypes.ts"; - -import performTaxaWeighingPythonCode from "./lib/perform_taxa_weighing.py?raw"; -import generateGraphPythonCode from "./lib/generate_pepgm_graph.py?raw"; -import executePepgmPythonCode from "./lib/execute_pepgm.py?raw"; -import clusterTaxaPythonCode from "./lib/cluster_taxa.py?raw"; -import computeGoodnessPythonCode from "./lib/compute_goodness.py?raw"; +import init, { + perform_taxa_weighing_wasm, + execute_pepgm_wasm, + fetch_unipept_taxa_wasm, + generate_pepgm_graph_wasm, + cluster_taxa_wasm, + compute_goodness_wasm +} from "../../pkg/peptonizer_rust.js"; interface DedicatedWorkerGlobalScope { - pyodide: PyodideInterface; postMessage: (message: OutputEventData) => void; submitPepgmProgress: (progressType: "graph" | "max_residual" | "iteration", currentValue: number, maxValue: number, workerId: number) => void; } declare const self: DedicatedWorkerGlobalScope & typeof globalThis; -async function loadPyodideAndPackages(): Promise { - self.pyodide = await loadPyodide({ - indexURL: 'https://cdn.jsdelivr.net/pyodide/v0.26.4/full/' - }); - // Load all packages into the Pyodide runtime environment that are required by the Peptonizer - await self.pyodide.loadPackage([ - 'numpy', - 'scipy', - 'networkx', - 'pandas', - 'micropip', - 'requests', - 'openssl' - ]); - // Use the imported .whl file URL directly with micropip - await self.pyodide.runPythonAsync(` - import base64 - from pathlib import Path - - import micropip - - await micropip.install('rbo') - - # Decode base64 string to binary and write to a temporary file - wheel_data = "${peptonizerWhlBase64}" - wheel_binary = base64.b64decode(wheel_data) - - # Define a temporary path for the .whl file - wheel_path = Path("/tmp/peptonizer-0.1-py3-none-any.whl") - wheel_path.write_bytes(wheel_binary) - - # Install the wheel package - await micropip.install("emfs:///tmp/peptonizer-0.1-py3-none-any.whl") - - # Clean up by deleting the temporary file - wheel_path.unlink() - `); +async function fetchUnipeptTaxonInformation(data: FetchUnipeptTaxonTaskData): Promise { + console.time("Execution time fetching Unipept information"); + + let score_keys = [...data.peptidesScores.keys()]; + let peptidesScores = JSON.stringify(score_keys); + let taxonQuery = JSON.stringify(data.taxonQuery); + + const unipeptJson = await fetch_unipept_taxa_wasm(peptidesScores, data.rank, taxonQuery); + + console.timeEnd("Execution time fetching Unipept information"); + + return { unipeptJson }; } async function performTaxaWeighing(data: PerformTaxaWeighingTaskData): Promise { - // Set inputs for the Python code - self.pyodide.globals.set('peptides_scores', data.peptidesScores); - self.pyodide.globals.set('peptides_counts', data.peptidesCounts); - self.pyodide.globals.set('rank', data.rank); - self.pyodide.globals.set('taxa_in_graph', data.taxaInGraph); - self.pyodide.globals.set('peptides_taxa', data.peptidesTaxa); + console.time("Execution time taxa weiging"); + + let peptidesTaxa = JSON.stringify(Object.fromEntries(data.peptidesTaxa)); + let peptidesScores = JSON.stringify(Object.fromEntries(data.peptidesScores)); + let peptidesCounts = JSON.stringify(Object.fromEntries(data.peptidesCounts)); - // Fetch the Python code and execute it with Pyodide - const [sequenceScoresCsv, taxaWeightsCsv] = await self.pyodide.runPythonAsync(performTaxaWeighingPythonCode); + const [sequenceScoresCsv, taxaWeightsCsv] = await perform_taxa_weighing_wasm(peptidesTaxa, peptidesScores, peptidesCounts, data.taxaInGraph); + console.timeEnd("Execution time taxa weiging"); return { sequenceScoresCsv, taxaWeightsCsv }; + + } async function generateGraph(data: GenerateGraphTaskData): Promise { - self.pyodide.globals.set('taxa_weights_csv', data.taxaWeightsCsv); + console.time("Execution time generating graph"); - const graphXml = await self.pyodide.runPythonAsync(generateGraphPythonCode); + const factor_graph_bytes = generate_pepgm_graph_wasm(data.sequenceScoresCsv); + + console.timeEnd("Execution time generating graph"); return { - graphXml + factor_graph_bytes }; } -async function executePepgm(data: ExecutePepgmTaskData, workerId: number): Promise { - self.pyodide.globals.set('graph', data.graphXml); - self.pyodide.globals.set('alpha', data.alpha); - self.pyodide.globals.set('beta', data.beta); - self.pyodide.globals.set('prior', data.prior); - self.pyodide.globals.set('worker_id', workerId); - const taxonScoresJson = await self.pyodide.runPythonAsync(executePepgmPythonCode); +async function executePepgm(data: ExecutePepgmTaskData): Promise { + console.time("Execution time Nori"); + + const taxonScoresJson = execute_pepgm_wasm(data.factor_graph_bytes, data.alpha, data.beta, true, data.prior); + console.timeEnd("Execution time Nori"); return { taxonScoresJson }; } async function clusterTaxa(data: ClusterTaxaTaskData): Promise { - self.pyodide.globals.set('graph', data.graphXml); - self.pyodide.globals.set('taxa_weights_csv', data.taxaWeightsCsv); - self.pyodide.globals.set('similarity_threshold', data.similarityThreshold); + console.time("Execution time clustering taxa"); - const clusteredTaxaWeightsCsv = await self.pyodide.runPythonAsync(clusterTaxaPythonCode); + const clusteredTaxaWeightsCsv = cluster_taxa_wasm(data.sequenceScoresCsv, data.taxaWeightsCsv, data.similarityThreshold) + console.timeEnd("Execution time clustering taxa"); return { clusteredTaxaWeightsCsv }; } async function computeGoodness(data: ComputeGoodnessTaskData): Promise { - self.pyodide.globals.set('clustered_taxa_weights_csv', data.clusteredTaxaWeightsCsv); - self.pyodide.globals.set('peptonizer_results', data.peptonizerResults); - - const goodness = await self.pyodide.runPythonAsync(computeGoodnessPythonCode); + console.time("Execution time computing goodness"); + + let peptonizerResults = JSON.stringify(Object.fromEntries(data.peptonizerResults)); + const goodness = compute_goodness_wasm(data.clusteredTaxaWeightsCsv, peptonizerResults); + console.timeEnd("Execution time computing goodness"); return { goodness } @@ -161,24 +137,25 @@ self.submitPepgmProgress = function( self.postMessage(resultMessage); } -let pyodideReadyPromise: Promise = loadPyodideAndPackages(); self.onmessage = async (event: MessageEvent): Promise => { try { // Make sure loading is done - await pyodideReadyPromise; + await init(); // Destructure the data from the event const eventData = event.data; - let output: PerformTaxaWeighingTaskResult | GenerateGraphTaskDataResult | ExecutePepgmTaskDataResult | ClusterTaxaTaskDataResult | ComputeGoodnessDataResult | undefined; + let output: FetchUnipeptTaxonTaskResult | PerformTaxaWeighingTaskResult | GenerateGraphTaskDataResult | ExecutePepgmTaskDataResult | ClusterTaxaTaskDataResult | ComputeGoodnessDataResult | undefined; - if (eventData.task === WorkerTask.PERFORM_TAXA_WEIGHING) { + if (eventData.task === WorkerTask.FETCH_UNIPEPT_TAXON) { + output = await fetchUnipeptTaxonInformation(eventData.input); + } else if (eventData.task === WorkerTask.PERFORM_TAXA_WEIGHING) { output = await performTaxaWeighing(eventData.input); } else if (eventData.task === WorkerTask.GENERATE_GRAPH) { output = await generateGraph(eventData.input); } else if (eventData.task === WorkerTask.EXECUTE_PEPGM) { - output = await executePepgm(eventData.input, eventData.workerId); + output = await executePepgm(eventData.input); } else if (eventData.task === WorkerTask.CLUSTER_TAXA) { output = await clusterTaxa(eventData.input); } else if (eventData.task === WorkerTask.COMPUTE_GOODNESS) { diff --git a/peptonizer_ts/src/workers/PeptonizerWorkerTypes.ts b/peptonizer_ts/src/workers/PeptonizerWorkerTypes.ts index 1317dc0..0304ef9 100644 --- a/peptonizer_ts/src/workers/PeptonizerWorkerTypes.ts +++ b/peptonizer_ts/src/workers/PeptonizerWorkerTypes.ts @@ -1,5 +1,6 @@ // Define a specific type of inputs that are expected for each task that can be performed by this worker. enum WorkerTask { + FETCH_UNIPEPT_TAXON, PERFORM_TAXA_WEIGHING, GENERATE_GRAPH, EXECUTE_PEPGM, @@ -7,27 +8,32 @@ enum WorkerTask { COMPUTE_GOODNESS } +interface FetchUnipeptTaxonTaskData { + peptidesScores: Map; + rank: string; + taxonQuery: number[]; +} + interface PerformTaxaWeighingTaskData { peptidesTaxa: Map; peptidesScores: Map; peptidesCounts: Map; - rank: string; taxaInGraph: number; } interface GenerateGraphTaskData { - taxaWeightsCsv: string; + sequenceScoresCsv: string; } interface ExecutePepgmTaskData { - graphXml: string, + factor_graph_bytes: Uint8Array, alpha: number, beta: number, prior: number, } interface ClusterTaxaTaskData { - graphXml: string, + sequenceScoresCsv: string, taxaWeightsCsv: string, similarityThreshold: number } @@ -38,6 +44,7 @@ interface ComputeGoodnessTaskData { } type SpecificInputEventData = + { task: WorkerTask.FETCH_UNIPEPT_TAXON, input: FetchUnipeptTaxonTaskData} | { task: WorkerTask.PERFORM_TAXA_WEIGHING, input: PerformTaxaWeighingTaskData } | { task: WorkerTask.GENERATE_GRAPH, input: GenerateGraphTaskData } | { task: WorkerTask.EXECUTE_PEPGM, input: ExecutePepgmTaskData } | @@ -48,13 +55,17 @@ type CommonInputEventData = { workerId: number }; type InputEventData = SpecificInputEventData & CommonInputEventData; +interface FetchUnipeptTaxonTaskResult { + unipeptJson: string, +} + interface PerformTaxaWeighingTaskResult { sequenceScoresCsv: string, taxaWeightsCsv: string } interface GenerateGraphTaskDataResult { - graphXml: string + factor_graph_bytes: Uint8Array } interface ExecutePepgmTaskDataResult { @@ -83,6 +94,7 @@ enum ResultType { } type SpecificOutputEventData = { resultType: ResultType.SUCCESSFUL } & ( + { task: WorkerTask.FETCH_UNIPEPT_TAXON, output: FetchUnipeptTaxonTaskResult } | { task: WorkerTask.PERFORM_TAXA_WEIGHING, output: PerformTaxaWeighingTaskResult } | { task: WorkerTask.GENERATE_GRAPH, output: GenerateGraphTaskDataResult } | { task: WorkerTask.EXECUTE_PEPGM, output: ExecutePepgmTaskDataResult } | @@ -102,6 +114,7 @@ export { }; export type { + FetchUnipeptTaxonTaskData, PerformTaxaWeighingTaskData, GenerateGraphTaskData, ExecutePepgmTaskData, @@ -109,6 +122,7 @@ export type { ComputeGoodnessTaskData, SpecificInputEventData, InputEventData, + FetchUnipeptTaxonTaskResult, PerformTaxaWeighingTaskResult, GenerateGraphTaskDataResult, ExecutePepgmTaskDataResult, diff --git a/peptonizer_ts/src/workers/WorkerPool.ts b/peptonizer_ts/src/workers/WorkerPool.ts index 883a723..dcc5bb6 100644 --- a/peptonizer_ts/src/workers/WorkerPool.ts +++ b/peptonizer_ts/src/workers/WorkerPool.ts @@ -3,6 +3,7 @@ import { ClusterTaxaTaskData, ComputeGoodnessTaskData, ExecutePepgmTaskData, + FetchUnipeptTaxonTaskData, GenerateGraphTaskData, InputEventData, OutputEventData, @@ -81,6 +82,16 @@ class WorkerPool { }, workerCount); } + public async fetchUnipeptTaxonInfo(peptidesScores: Map, rank: string, taxonQuery: number[]): Promise { + const eventData: FetchUnipeptTaxonTaskData = { + peptidesScores, + rank, + taxonQuery + }; + + return await this.queue.pushAsync({ queueInput: { task: WorkerTask.FETCH_UNIPEPT_TAXON, input: eventData }, progressListener: undefined }); + } + /** * Generates a CSV-file representing a dataframe with all the taxa weights required for the Peptonizer. These * taxa weights will be used in a subsequent step of the Peptonizer to generate the factor graph. @@ -90,7 +101,6 @@ class WorkerPool { * @param peptidesScores Mapping between peptide sequences that need to be considered by the peptonizer and a * scoring value assigned to each sequence by prior steps (e.g. search engines). * @param peptidesCounts Mapping between peptide sequences and their occurrences in the input file. - * @param rank At which NCBI taxonomic rank should the Peptonizer perform the taxonomic inference? * @param taxaInGraph How many taxa are being used in the graphical model? * @return A CSV-representation of a dataframe with taxon weights. */ @@ -98,7 +108,6 @@ class WorkerPool { peptidesTaxa: Map, peptidesScores: Map, peptidesCounts: Map, - rank: string, taxaInGraph: number, ): Promise<[string, string]> { if (this.isCancelled) { @@ -109,7 +118,6 @@ class WorkerPool { peptidesTaxa, peptidesScores, peptidesCounts, - rank, taxaInGraph }; @@ -117,21 +125,21 @@ class WorkerPool { } public async generateGraph( - taxaWeightsCsv: string - ): Promise { + sequenceScoresCsv: string + ): Promise { if (this.isCancelled) { throw new Error("Workerpool is no longer active. Cancel has been called on this pool before."); } const eventData: GenerateGraphTaskData = { - taxaWeightsCsv + sequenceScoresCsv }; return await this.queue.pushAsync({ queueInput: { task: WorkerTask.GENERATE_GRAPH, input: eventData }, progressListener: undefined }); } public async executePepgm( - graphXml: string, + factor_graph_bytes: Uint8Array, alpha: number, beta: number, prior: number, @@ -142,7 +150,7 @@ class WorkerPool { } const eventData: ExecutePepgmTaskData = { - graphXml, + factor_graph_bytes, alpha, beta, prior @@ -152,7 +160,7 @@ class WorkerPool { } public async clusterTaxa( - graphXml: string, + sequenceScoresCsv: string, taxaWeightsCsv: string, similarityThreshold: number = 0.9 ): Promise { @@ -161,7 +169,7 @@ class WorkerPool { } const eventData: ClusterTaxaTaskData = { - graphXml, + sequenceScoresCsv, taxaWeightsCsv, similarityThreshold } @@ -214,10 +222,12 @@ class WorkerPool { const eventData = event.data; if (eventData.resultType === ResultType.SUCCESSFUL) { - if (eventData.task === WorkerTask.PERFORM_TAXA_WEIGHING) { + if (eventData.task === WorkerTask.FETCH_UNIPEPT_TAXON) { + resolve(eventData.output.unipeptJson) + } else if (eventData.task === WorkerTask.PERFORM_TAXA_WEIGHING) { resolve([eventData.output.sequenceScoresCsv, eventData.output.taxaWeightsCsv]); } else if (eventData.task === WorkerTask.GENERATE_GRAPH) { - resolve(eventData.output.graphXml); + resolve(eventData.output.factor_graph_bytes); } else if (eventData.task === WorkerTask.EXECUTE_PEPGM) { const peptonizerResult: PeptonizerResult = new Map(); for (const [key, value] of Object.entries(JSON.parse(eventData.output.taxonScoresJson))) { diff --git a/peptonizer_ts/src/workers/lib/cluster_taxa.py b/peptonizer_ts/src/workers/lib/cluster_taxa.py deleted file mode 100644 index c3a4eb2..0000000 --- a/peptonizer_ts/src/workers/lib/cluster_taxa.py +++ /dev/null @@ -1,21 +0,0 @@ -from io import StringIO - -import peptonizer -import networkx as nx -import pandas as pd - - -graph_xml = globals().get('graph') -taxa_weights_csv = globals().get('taxa_weights_csv') -similarity_threshold = globals().get('similarity_threshold') - -clustered_taxa_df = peptonizer.cluster_taxa_based_on_similarity( - nx.read_graphml(StringIO(graph_xml)), - pd.read_csv(StringIO(taxa_weights_csv)), - similarity_threshold -) - -print("Successfully generated clustered taxa!") - -# Return a CSV version of the clustered taxa dataframe -clustered_taxa_df.to_csv() diff --git a/peptonizer_ts/src/workers/lib/compute_goodness.py b/peptonizer_ts/src/workers/lib/compute_goodness.py deleted file mode 100644 index f1b418d..0000000 --- a/peptonizer_ts/src/workers/lib/compute_goodness.py +++ /dev/null @@ -1,9 +0,0 @@ -import pandas as pd -import peptonizer - -from io import StringIO - -clusteredTaxaWeightsCsv = globals().get('clustered_taxa_weights_csv') -peptonizer_results = globals().get('peptonizer_results') - -peptonizer.compute_goodness(peptonizer_results, pd.read_csv(StringIO(clusteredTaxaWeightsCsv))) diff --git a/peptonizer_ts/src/workers/lib/execute_pepgm.py b/peptonizer_ts/src/workers/lib/execute_pepgm.py deleted file mode 100644 index c14c19a..0000000 --- a/peptonizer_ts/src/workers/lib/execute_pepgm.py +++ /dev/null @@ -1,48 +0,0 @@ -import json -import peptonizer -# Provided by Pyodide, required to send status updates from this thread to the main thread in JavaScript. -import js - - -class JSZeroLookaheadProgressListener(peptonizer.ZeroLookaheadProgressListener): - def __init__(self, execution_id: int): - self.execution_id = execution_id - - def graphs_updated( - self, - current_graph: int, - total_graphs: int - ): - js.submitPepgmProgress("graph", current_graph, total_graphs, self.execution_id) - - def max_residual_updated( - self, - max_residual: float, - tolerance: float, - ): - js.submitPepgmProgress("max_residual", max_residual, tolerance, self.execution_id) - - def iterations_updated( - self, - current_iterations: int, - total_iterations: int - ): - js.submitPepgmProgress("iteration", current_iterations, total_iterations, self.execution_id) - -graph = globals().get('graph') -alpha = globals().get('alpha') -beta = globals().get('beta') -prior = globals().get('prior') -execution_id = globals().get('worker_id') - -pepgm_results = peptonizer.run_belief_propagation( - graph, - alpha, - beta, - True, - prior, - progress_listener=JSZeroLookaheadProgressListener(execution_id) -) - -# Now convert the results from PepGM into a list of taxon IDs and the corresponding score values. -json.dumps(peptonizer.parse_taxon_scores(pepgm_results)) diff --git a/peptonizer_ts/src/workers/lib/generate_pepgm_graph.py b/peptonizer_ts/src/workers/lib/generate_pepgm_graph.py deleted file mode 100644 index d90f529..0000000 --- a/peptonizer_ts/src/workers/lib/generate_pepgm_graph.py +++ /dev/null @@ -1,13 +0,0 @@ -from io import StringIO - -import peptonizer -import pandas as pd - -# The PSM input should be provided to the parser as a list of strings -taxa_weights_csv = globals().get('taxa_weights_csv') - -# Finally use the computed weights to generate the graph -pepgm_graph = peptonizer.generate_pepgm_graph(pd.read_csv(StringIO(taxa_weights_csv), dtype={"HigherTaxa": "Int64"})) - -# Return an XML representation of the generated peptonizer graph -pepgm_graph.to_graph_ml() diff --git a/peptonizer_ts/src/workers/lib/peptonizer-0.1-py3-none-any.base64.whl b/peptonizer_ts/src/workers/lib/peptonizer-0.1-py3-none-any.base64.whl deleted file mode 100644 index 1138a55..0000000 --- a/peptonizer_ts/src/workers/lib/peptonizer-0.1-py3-none-any.base64.whl +++ /dev/null @@ -1 +0,0 @@ -UEsDBBQAAAAIAG90WlrGZLPnQgEAALUCAAAWAAAAcGVwdG9uaXplci9fX2luaXRfXy5weX1RQU4DMQy89xX7gIo/ACdEVSEqLlwsN+vNRk3iyMmWltfj3SaFShW3xOMZe8aDcOgevkkYPPMBR8Ie9uQdDZCEE1osjmPnQmIpnUzxDrruPlVg0/hvwlYo543LhSLJaliGJM8FtD75kpveUkuUCkenSzS4MVAyyW/z/F26XU9Q8rG2DVTM2OoZMPYwOF9UruAJG/v/rir1Rc6O5YaXSAaWsNRgwV20bTKawgJWMI1gZ683adXKsrMNl7bKpFMRJc+qHCEbVueNdg9bd8YTRjD5uK5B/IWrKkb0Z4WsuB4yoZjxat/F+bBZ40bBQGpcRd/ae0dFR3BIk65rmfuoB6yqi3PjJ72mqPemWCuXYPaYqYd5HxecR3HlXNlTdHPooOJB3wY1sSbxccGe/0CVFc3egWA8XFPZPj+9wPvj9nW3+gFQSwMEFAAAAAgAdUVjWu7bULbHAwAAUwkAACEAAABwZXB0b25pemVyL2FuYWx5c2VfZ3JpZF9zZWFyY2gucHmNVdtu4zYQfddXDNIHS4FWbfbRQB7SzbZdoCiCJG+GIVDiyCIiiwRJxev9+g5vspS9IEZgxdRczpw5MxRHJbUFxUbODNCf4pkIZ7qRWdZpeQTTCnWujGXWQHyJo9VSncN7e1ZiPKRX/wpjS/iPHZE/T2rAEvwjy7J2YMbAA9P0zqJ+QptfzIptBvS5urryz0dUGg2lIVRg0ILsCGX0NGB7ZqFnrwgN4giTQQ6d1GT7gMrKUXxD+jGy4WwEWUuwJ2Qv5OYcyE/ISbuQ4WAQ2PmsiopiB2aFHIENB6mF7Y/VChgbVM+20A2SWX/QoF39VlpInQ6yjGMHrTyqyWJ9kJKPaExu2VdWm1ZSiVvgoiXC6Ejw+oTi0Fs6VLy6Z5b95SqO1PwGT45fB3nh790JL9NnaM7gD0GMwNG0OHLXGKk5ah/CUADkNbnXghu4jQdLPJWweDR5UcILnm8Hdmw4A3e29d+7m30JGl+pCXj7rCcssgju81erWRvwhbAOJny5pw6O3B0LTUxoyqFkABYyLpEFVLsA0Hc0/FteClvXsF96R0YoQLB+l3+E/yn0KCk7iUNJQ4KjjjptNKwRg7Bn4txYLZrJEe/9Y4d5ndxvU6B8ha14m87l0Gx8+dAwp2EjjmJg2uUgXZ2ctoMkPlwoZZ5PHy+dfrmnnphpsIFMjXbSo5vg6pFiE9VPc9zcW7jPSnG7zT/0RP1M8Tf76pUNE5pytt2J0eaBsmJB64LPuRdFRXnzAq4hv4Hfv2fm+ho+FnEwOjHyukFj68ts57GSrd8kO78cdmFGlqtjv3/PzKSp/Vu8EpWObE4WnbP4bonoyVloOR16b/mn3wvwsNgJd2knkEfn9BSVDUdm295Jeq4j6BKtKRMFtJzSAgAy0aL1TCJr+6g2s4LnYodOGp9jWX3AHphC3way8Cl710ZjL6l8K6ugu62HBzPDdzAQxz67o9kEML0c/HjaE+1NV5XZzkK4qcjJdYOqGi0TozdMogxDTltetoI5YHEiWUewHcHBfF66P164c7KPFTy7GUyFh1pi406kVgybn0/6PXGDcHu6EOKF4nAveXmjp7tLN35R7XqlbUzLBoIUg2xKd6mmGXALmpzCnASL2kejHaPW14wH5FdZsE7trH3D/LE/j7OYtncZHGtXIIkiNXpmdBWmYkrRFZH/6np6M2RFMYcKCFOMOW3ccEHgpp7r+CZUvsoeoZpiucGDW/Cib6EXl9Q65HwLkcHqHnoMy++HE7MxS3GeSBTeLs3M3Kc3c1ott+pPke7+2NPtmP0PUEsDBBQAAAAIAHtsXlkMlcau+wAAAF4CAAAZAAAAcGVwdG9uaXplci9hcnJheV91dGlscy5wea2RTUsEMQyG7/0VAS/uYUZF8bCoIKhH8b4sS3emswTapqQd3frrTaejuyAeRHtK8/HmSYIuECfwowsZdAQfFB652pQD+l2NJKUGJgexQ4nEYDrUFuZ0S7s4OrMPSqneDOCJnbb4bk41s87LUt8+P9yXz8qHdrCk0/XVegHN3U+xpQJ5bNLIHiYZOJPcVhpV1YU0O/lqpROSBxoKCwSmrd6ixYQmTkTi3fwfVeJcjfIy3BYumb7qQnPYx0wqqCXT7DsTEjwVLVnsC6FPj8zEB7HA4vuc71cdjraV5zPoV8J+M/resPC//XnsqX5VEW7gwjSX52tBm4xv11IfUEsDBBQAAAAIAHxJYVk+QFO0tgYAAHwYAAAeAAAAcGVwdG9uaXplci9jb252b2x1dGlvbl90cmVlLnB55VhLb+M2EL77V8wih8hZr5wEix5ctOgjxyB72L0ZhkBJtM2uJAokZddFf3xnSD2olzcboL1UBwOm5v34OCORl1IZKKq8vADTUJQL4R2F5lKK4uDemOZVzsxxsVjslcwhhPqQKcUuUWVEpt2bmrV+/Sy0WcGn0ghZsMxR6ESgCi0OeNLQ7fcmkcVJZieOKm7oOOM5LwwjTpB7MEcOjqSyR0ZxDixJpEpJn5GguWLFAZkDzfkGjsaUerNex8LEVfKVm1Cqw1oqR7b2ZJGotVbJOmfacLX+XGYikXup8s9kkArLyxLFFtLAWQljeAHxBXL+7t0iyZjW8PuXF5nyzQLwSfkeokgUwkQRGpLtV/CHFIWJWCxPaBYGNHx5+pXCti3KcJ9JZn74uFs6bnpIk8pZJv7igEbAQXGGZkGpeCI02ttSkvjwNdLhJz9RYSs/8JiXi77cjO9NVDKFSdi0Gdw6V0ngiyx4n0OJw/HbLAMt4ivPxFHKNIp5Js8e34wvfUE3UGIGqAJyfeAaUnkuNsBSWxQnpgSLM64t6S+aqinJuTnKtM1VYqMbJbLCQBRoaJAd9QZundW3K1D+3yV8+Ln946fMiQEGVhCQIDgLc3TJx9zJ2CbTnCW4CGmwYQ9O2tY2VQyIPZw5HNmJo4SGbtmq8bKFQfB6hmz2C8FaHfaS28hQXFeZQXbnxEwFOCq/BpADlQwJ/JQjBWr1ZZhKFTXlKFtca0YJq0rYgK5io1hixkmjFNW0UVXWDcUKfeYq0ljBGwwcIozEEKrI+XLiiZFqvtMohTPvuoyimRxxabJGQWgLBoNyNkyRA1FpM47h4kUwtgvVw8M4G34y27f0jCVsN5sPD7tVj2rSzpZiue0btxka+94P6W6YwWnocKbXNdNPU0R1Y3P1hmB7JceKdHToV3UvEVPCeuXZSvNPZ8UNg2A5vUrshZ9Sfc3Q5eq65i5Tk+G0bP/LeM6JbgI648gwnjcgiwTROcs64EEmtCXh4sS726CUNAEIqd4Y7e8J5utg5WoPjnTeTYsel5Vu0PTfgkM/zSNgWjSDUzeEfcEhbHaCKiIjI33ElKVRJ0zPYvyKLlzDRYEkNILOzhLe2GUt/V5FiNwzLIMxRx4QFIuDOdoLC9nwN6CJOsQKzAIUiSSPgZUbUPk3DizpoQp27DQfFLyu2FY8FnaUsQtXjb/2px68yMjtbmAPTrp22nnlaFdb45TUOjp6lN4nj+JKZGm0FwrVWJ7On0lKxXOGGccLybkRDMmQvWQHmtRilnw9M5UGw4G1iLqkdzdwp7crrrF1rsy+v2huaCfRRlUJ7jAkD6w8CHBjGXs8G72GgCZEYkP7O2N6yEinNnHdBEdHyz58kp6QlSUv0qDl8OJFc1jq5tMCIRARQYkMN8FYE7CnVZ5fujkMtMQJlRkaU/0lpcpj9BQd7SgRCRhi6NmdP/6ILEI3+EBTbo746sng+71IBOl0U7kbkZEnhJgnrNKcuC4Wq51xK3dwlsUtRWmfVZyQvW+akQZXS13lK3tJmWOl4UmHvTBHFGPaBHlwv4JHuLsbdioOav1G7GfixjlXv4aEFeRkzGlPay1VnHRp7jYAFotMmIsDc1pZUfXDldTVCcbasxzB9mEF97saD758evoEyZE8cJbgJszVYg4YGpn237gZRg1o74bpQncUlOFu2dG94D53wR0EdRTEUhES3tYyCdp4ivvWik7bMQDdeLYEW3R/HTze3QXP7x8IGXvSqLoiutOLuq+xSQZh2OLoPA74tZ5sfBKdT0PbGtloHKwhsMUUPONUbU3se2x19taqkdVbgff4427ENli2ZvhI7Zi3229b4AgnVt/OsFVP3XLsg1+onQgPZFx+cRWn0kGYcZ9tJLSl5goHgvORFzVu9iYnH0r6Bryitp32vaCPTNbtA8eVO3jS8De8LCHH3UWUmeAp3RUSBL7sbu/feuuTFdLEbqKeMO9v+HjTSR1PVHOfa67NKIMbs73hablsVXl9P3Gjjnq+pYGGhuZuY/f0QbRCH38R6TMpSzgyWvBL9D/GCkkBLxGEgmeLBO8fPI4G7esPfa4YvDt0Dl0eViPUpq5zK/Jm3OgTCXzeLa71OvW5q6uJRq7jW3ffQJBV2ut0l+zu6Fsdbun9s7GCejC/JrQmuS7HX6QmytFaMlzwx6DQW9deIcYttsNBrjdsTmTsfteVcbuwRZi3qJlEvHkuEumfdh583aLjry09O7aNsF3YLYkT6zr25n9jhb/ITdsxwoi3LnwNlMxsd81rfxPd3m8gGIzltjV3/wBQSwMEFAAAAAgAb3RaWqZ1mv9zAwAApAwAACIAAABwZXB0b25pemVyL2V4dHJhY3RfdGF4b25fc2NvcmVzLnB57VdRb9s2EH7XrzioD5MART/AgAcMCTDkoV2xbnsxBJWRzjY7iRRIqrb//Y5HSlYcLwnQhyJohSASydPdd8fvO8qyH7RxMAjVCgv0N7RJsjW6B21BhsVOKrQ4hGl3GqTaTUt3snFhXupp7pMzZHH/R/RTjkoOOLi60X1Pz41w2ky2f4e128VSkiQtbgmRsVg7cdSqto02aLPGfiUnyqFyK7DO5HDzKyPYSOUK2HZauGqVAF1pmvL9TxQtCLj99M/NVnYIbi/cLxYeEBUMRrdjgy08nGge4SMOv78H0e20kW7fF4BHZ0TjaKpjA4YBX0U3ooUtJcEL4iiAqgcG3WgURxXQEiqplTAn6MXAFUPR7IHzgfs7oDSoYM6X3OpGCkc42H+ZsIuPwogeHRrLw5v54uFlJQqe9ddvj3OFvYjZ7lCh4TCUrhmV8piupF3O9eOHd5QXlZACwmj9K4EpvCZbC2siTOlNajLJpq1fblVegKJU7HqT3t+lBaScpn8gKmFa5eyLClMHf/S/7HSzofsmmsB6Tda+dGl1YR2fytboQYksT5br7Gc1h6wCWqdrNfZoZJNFu81kUECrD6oR1q1TplN6iW7ybIm9daBCFt+OsQNZz6azcx+4k9YtQE7crj1f6BV/m5Y9wTLZHovoKAepABk5beMZOhW1ylczAS69em1E49wXIMCj6h6rECfw9sl7UYZRA68VoufiGxKiJ+aLUvygHa447oPckcPtFg2qhoboDh6/20uCMSqOxBA+P+1en4GMWJJXFwNob4FnclCjoCasrPPF09sFaPuG2sQyUa/AZxt7JP/VM2N97biY5OKrUk9bvL7qoNyhq7l6NZHGQxAZy8OLy9NItl5jS2jlv3iyWV69XrJBav/j5xmZLvFvgpOqCl1k9jLNv0a4TYdCcVO+qla6vyxQOPgPgufVyXwfLZKCgHQUrcmTwsOFN626E3gkgjgd6kn+GP+NwY5ZJxUVsRezkmL54C8vez5dLgAtpdcIRSyGAwGjbGlrDDaOYhIu4duFbDnqjMoHmPCQK4NIw27slV3FsvpdKeKzbIufR/SPdUR/5zZ0PuCnBsSf4uR3M7MpLdLyi5Yq26RnyvLWRdKe9zPuY/VN3xeMoKQEUbXZOfij9rX85Cg8+adRGCw+Qaiz5o+6WfytEbzyIE/+A1BLAwQUAAAACAApRXtZsF49jlMLAABTKAAAJQAAAHBlcHRvbml6ZXIvZmFjdG9yX2dyYXBoX2dlbmVyYXRpb24ucHnNGl2P27jx3b+C0OJ6cut1s3k04AC9TXI9IJcEh0X74BoCbdE2E5lUSWnXTpD/3hmSokiJ3mxyeahxt5Gp+eJ8D+mdkkfSnGsu9oQfa6ka8obrZkb+Ic6TiVsR7bE+E6qJqP0Sax6k+ngyq6dutaaihAX4ry4nkx3SnndkqVL0XLQNr7R9s5VVxbYNl0J3MIIeWdm0dcUmk8m2olqTO3qS4ldF60MuTnPzMF1MCHyyLDP/3hq4B94cyK4VjmAjgb7QjWq3wJrUrG54ya4bpEb2SIW0Gjf9SjSKffr729tffiNHWqMi5hH5ku1IUXDBm6LINat2jj1+dFszlU/n/v20fwWQc8Ou4GVRgU7JkqzWE09zqxhtWIGqKACOFg+M7w+NNjxmJFwKOF6RUsmaKPkAezzQhhzoPSNUEAb7OBMuyD8BhSlQG7UAsCdGlSZSVGf4s2We1la2otEgVshrlfUEsvX8nlYtKyxksLsQY0jAAw0BB6Q56D+3lFdOlBfkBtZFyU49q7V/EuyhsKZbgs/NjeasxxWs3DPUcR7ym5FMs/+2DPacwXPAvKfuHKOgTaP4pm3YcDtz0F91zqNNVfS4KSkaYUHiN/iB5VXPeD0bAXwerRh/+w1ciNPqF9gI2xXPsgW5IddkV0na5JbmViogOB1TTODfAP7TcbfginupzoCUOZVkY9Av8dKAGD1xvbzp1wbe8v0aNhsIXWdGPkcSmzDLvkxnToaECFw0TEFy4RhzgRPZzPIY3JyWpfEvbWI19144N4tfRRWy7FDHzvYt2AMtTidxrhmImaA3kNdjPVHCFPxYJpe2X9NtI5XNW1dEsVoxzTDEheT6TN79QbZ1CRF6gFpR8iMTGtI2EXlNFTxjMdDTk+jkIbqBbeh8+Xw6TSflGdIrTI2ZkXuqON1UzH4PsqeVCi3vC02e2UXIEKvMwGOuMA9v6IZVGoImVsGuI2IR88uMQRtXkISZX4dEXRKHD0UXQuGIIUA2jLSCbZnWVJlCy0kpQVVYZ1kJ9auCR7mDgGYnvuEVb0yqR9Jc1G0zUHuyXH5jHevrVFdGbakCAXQDRdL6lNO9eS5K2tAFGbDFDzpMVwJNlu7h58aZcnxc3qmWTR/3N0+pB9uBLnEZFeJfL6JscuWKbeR8kPpVl/4jaL4zdFY361WfZdZkuexT42KUGku2V4zB/oKt2TUj8+pZ4EVeLZj/NlLplFq6dx59jB8pqIMjfyMZuX3/Ery4E36Z7ToXf4/h1bxtjxumlla+R+gG6WSVoH+aGuWfjOY7eRP7jOgl6HgFYbhoBikAbYXODb4HYdFACGwldIjCZBDn+LrdGH1N0Ek7rKJb1U6ZXFj6umhk8ZGxetA8OkTTM0KAeXSSdy7DoNNya7hbRrcHD+bCpCMWNii3d6lYzKcO5YqAQqxk2KuiZDGJoefnogszLmzQrMTaGkB0rt9vMuBhjIhODUqD3KIYcbkYsaTiey5oZUknJAhcwNsVRREbNSPlIArNsjGQl5SWH+a8YcewecQAG4kcUzLUDQwQTBLYqDQJp1/FmlaJwCKgDRxs3u0Cs2ifN2NrBc+9u/xsHu4OXBOLAw+0n4MsF1/ocLRAL921lVMu34KajyBvZWcVCi+6ugCF8PYOYhH+xQrxFtPUNaQpG7faO9ml2tf7un3C2rLMzOiksyARd46Kn4Z+BL9gIArkQerkl/hYUxAPnNIt/v6G7DjWLt0VmxS5SyUEP71QmOpW2XslG8ZBNKi0Tsh1aNwe3tRA70+GQpx7FeWakX/hlPJKKajG0AbDzMJLYsyHGHPy6lTbLAIZBKJ8QX7SGfkpJBoIG/PHrO9EjBmbpNZlWNiVa0I9DHThI0Ju31+jVDuwSCaclqALgpEiyGxQmKcxse6d7W4ht2nX/R0rjxds9so2GOADoEWcmW3SQ79+IB1DTIwUHJTslPFH6yroqGgdec/Ug+IuY4P733PZ6iCh+D0O8onPEWbtyS1AnAQjNB8eiAINi4UY9Dqh0/77wITdcxevt3e/Rrv70ELXAgAlAphjC+r3CPO0hum7dLEDietImxlpUZUmTcjjEelgKJWlI29TVlIa36a40ncvqxYrE2mwtbC1Aum5qohgx64axurGbqKQO6PtApXRnz8kQRQ7ghkfgUKXNK91+v320guXwYKzD/yEPZux8dCWw/5tL2GXSrZ7mzc7qiaR2jbmxXOjmz0zeuHQ2tkoMpJAXm3Q1HsmWNdcAAjqeG60qyN+j3SArpMyvIzklvvK9TBr8oI8H3eHXn0DPUT6sB1Uq3qljHrAMWX8eEzj8MCiV+nKv1snUXGnEfZww1FySrOPNojDPBNl7qlOx7sNnKYDB1WP+8aBD3nSl/rpsbN2KH7lMlIUKh1ecr+rPDPFGvpX7GGz+QfJRQ6pOGAzjbtj/2Jsha9IFESm11WngUs8ovz+mkO8YMxwsZN4yiBdurJDXasYlJGDbKsSKWygGznj7GkSGBQAHL4xv1RS1jCmAjeEl/ZokcOIgZ2cOXfin1jAFnGOOM/ug9hCWTnKqqiASaBiIg98YTpN1EU/3ozUdMkIYy9YcZiexqdZ/XgU5NoCs0Hi6MuOTO92doDSy1D2mNPgjCy9JTMJ/fgtDSIGoMYwv1urvGFi3xwe2QcIcfPkvbjanAwmJDbGdI59CTnwe4PvSztWXGz4bRsAXY3tgLFDxUOd8NThNDd9ie9/BpA9zSG5gIibI37+j/jZ2gOodlUkIjyNjkyO0Cezwg0HwQHZY83IrcXSXaXvz0vtTcWm5VVjRm03cvjbDk/iDyOt7k4jtngpQPG0aAYzi4S5BGwfrW6AFmNisG6qGyRwbI78YlJmy6gIUJek5Nsm9m0tFXTfqLoRvFVIN+HNYBA+L93hL64tzN+oOvRPfktP559C+TMiBPp7uhBppD8jhrHV1wQAzgbunmFy+A7OAeNhfMScRlaO80jKCDFEWkMxzHDPs6F4GIkQ7BXMCl1CdCmAVvWBGueHv4rt2wrm8E+sjAMTcf1pkz05hPr12rWedLuVykwFUFFL1tibTBs5OIi6aopntRxCeM/vmfAjPH6+uQf+QWeYroMd93LdkRSQHxw9jG2L42VHPzxTzMYNzhXOS9dgDC5sK3H7/qW9DLan4dif8Ma28l7TIIC5eRzz96ftKEI9x/OVfPV81skFRQv6Irw4M0Yed1cevwgM/5202MmeSYYCUdvbPAupjDFLfm+uWXo86EHQUAL+T7edORLXBsO0y7OA203EbcxuvHIV+L31JMXRrcGZock03Rw5mCs3vP0Hy+IhQG2bnwSxSu6hazxSjBaYdu8lL0krSqZ2lXy4bIMVqGmxtgqo5QMLLlPsq9lAx1PyV5KjRTB2n2jckInVe1rB3yfDLLLlkzS/PbDtx2cg0CVxF6koMlg3IVYasn990+8bIju/Hu0r4ZhpkS5SGuv5caqWBmRsvOPTLA9+FAL5Tx2N4/UGSLjypfgNKRn/K7z/XTT3k+UIeT0pvCD/BijpudlmWOxlMe/668SLM3ZSmvR9Pn5WWV3RLTvICtSAt5vBxcuz/tYFv95k68SgkN4ZqzT7wfuZfausF0S9UNHcfetfoMpodxsCOcp10Xiqjb+eOYEvGDYjItA2aWbPEoeN/Ix8dpIt4t1/mQ1+jpElehKbcR0h8+WR/sP+cMkl6aj3MKd/2F3Yt/8XPYY9Cx+7yQVdJu1p9JJ80yv984UfzRhNDG1gfhJj3nz5MiYMBkLj+NkOdrw/uktuAxz+ZMXc0cLUCmMkkCznL+Hra/w2mZLrF/Ft0qLDlsJfDgY/apsOX88v/jDsggSWgvO/jkN4nTUGmD9+qR9IY3Fdlx/fkoUE+4uc6eR/UEsDBBQAAAAIAERQW1rq+9xs1wEAAPwDAAAsAAAAcGVwdG9uaXplci9mZXRjaF9wZXB0aWRlc19hbmRfZmlsdGVyX3RheGEucHl1kk2OnDAQhfc+RWkiRSARDtBSskk0q6yiyaqFkIOLptRgM3bRE26fsvkJ0+mwMn5l1/eeq/VuAJ5HshegYXSe4Rs1XMB3CqxUG+VysjTiyHXjhkHWjWbnt+qfi/b1ICllsIUWuenqqJHBUGtr6pZ6Rl+z/q0zBfJt6im1Owf2VZEEKXHWDXP9OqGfTyDKInhtr4ffR2SnR0yFyuHTl+Qttln8nclyVZ3STR/gmXxgeEPwyJ7whqD7PpJo0CG4hjSjgTfiDrhDuNANbZLfWUnu4PNDsvKCnPS6dX6PJtsWuVpJXjq0RSQZ9BUhTB6BHSzppd4LlDXgbD/DFXGUbRdE6DSDlvoDsRzdz7gWxDN6FKdkTcSSil/z2jjWTQF9qf6+wvIENZkgrmJiGTEOObRxBmQl99w9VxnGnjh7Kp7yarP045hpRjY35LFhaDrqjZcghSx230xG2NG7m+QS+XayjUuvwyRMATn7b9oSa4PWaMshhZ5m785XkaYq3+OXuRknxtQzpRWElNzOmOBS0qFzUy98aWY0WWGNrxKLnKcLWd1DL4OWTm6TEoPb3rxI23nM8N0AlTHZkOXLbP4zX+f1rxL78f4sZrDc9PGYzmpJ4CZ/10Ep9QdQSwMEFAAAAAgASmt6WstzhGClAAAAxAEAABgAAABwZXB0b25pemVyL25jYmlfcmFua3MucHltkcsKwjAQRff9itK1v+BCXYngQpciZdpOazAvJo3Qv3dEUjPB7M6ZO5eBnA/7Y3vZnU/XelvfqppfMzgDyjabLxGCNglC7J7KTpz4GY/kH4uOq5LEK6XgjV5DCMkI4HzBHHc0ICUjgOOClR0JigQXjGCUXpKSxBVSzKQ6zKaCJ7QxP1ayx15haCdy0ZfyE/7js67SzJR9xAtI4QzrdHRkoKnu1RtQSwMEFAAAAAgAeVlhWecdQOf1AQAAnwQAABUAAABwZXB0b25pemVyL3BhcnNlcnMucHmNU8GK2zAQvfsrhuwhNvUa9mpIodDrwsJue0lDUO1xLdaWVM2oIZT+eyVZjhOzXSowxpp5b968GXdWj8BnI9UPkKPRluGzbLiEF2cGzLIWOzDCEh4NGpYtHpl+5f45NloxKq6B2BZw/3FC7AN6769K6AYt+FDCciMVHw51Bv5sNpv4fgrcBAJenr8GpqDjJLmHRg9uVATbVHcLQrWwpUZb3JZw6tEipBhBIxTopnEWRjew9EKA5YhUZbFK7VsQvs+V7BLSF+guSeikh0YBbxaeZVUTrUV2VtVT65FMSBVa4JOG1vcttRJWIk1Nh3MfRB8jWw2PwtDcBBD+dKgar1xDjEMeLSyqFdjQ6NtwiukdBu6DIDf1Fq2xIZSUz/ZfaVkPDnbw+88l6brm7UCnvJh4B09W+xoEKJoeBqnQp0Qpwdtkdgn0Kk3cuBDpUbRoI77TdgEts6rIDJJDgPJi/1AfFjdlFwFV2ByTF7Db+dbqoOTZ1wAcDZ9jBl0g4QRiqRxml9tkYZmc3yXaUDjffONNccmcE6JLefwqFp47+GJawbg4u+zB+braFNynusHFKdszfCJyfnejO4PnIk6x5GXcUeq1G1r4juAI238IWKb2lgpv3rw5nnk15Ru/bmOL6A87eLgk4kD4f7CAionT/7PYUa4g2V9QSwMEFAAAAAgAb3RaWvh4Z9iWAwAAYAgAABoAAABwZXB0b25pemVyL3Bsb3RfcmVzdWx0cy5weY1VUY+jNhB+z68YsQ8xEofS5La6i8TTnVrtS3tq1b5ECBkwwbfGtmyzSVr1v9/YQEKS7WqjCBzPN9+Mv5lxeKeVcaCprKkF/Op6wYc92Xf65LeknrY66rRQTvAy1Se/Ch7CLRaNUR2kveSaaVdUqutwXVGnDIy+fw22LzPTYrGoWQOeqPA2Jfk/zBSG2V44S7jUvSsaLtgWrDMJqN7d7GCOJXqopnD0SLfApYMM1o/xdgH4iaIovP9gtAbXMhipQTVA4ds5JJheQjgChS9//v3BRwBSM6kcq6E8Bd9LOkDNvu+YdDGgbnAw3DEoqYGqpcbZENIwjcEQw+Xeu9tLcKd88N9+DWHSq0SptQy1uoRKhTowQ+KUydoeuGtJlFb2JYoTiJ48CkJCtlW9wEzZcID0im0m22t0Wu4D3e8Bds+HmSJfIHzAM6CSmAD01h9s6Jtg47VF6XWdekiBkFn9sFC0YzbbRU9fIwxlK2WYX7iTZlEeBwKsYDGQ4BMTrXb43o0QyDJE06OSUX6DHldpbZSWlMSLuT3wbM8h8yFFpwrsHGZ4RUbcbgIkUKuDrKh1WdQIRV10m93EbLGrixcqembJ6H2G0qJqWfU8Qz99xaiCWzcl+ADOUGkFxd5BkOfGxrAV9z3T8GqQLEBfHavstYmanZ4WnqCoeeVH4jWKdM9cANmiUSZMENnhBJFjDLgBR+xDOOuDlcvz+Jrdy7G7jhUIcCvOAweuRpZRkqF6Qa6ZmGf5J206+szC1OFY+fshbDd8nwASZv7OSW1feosl8WRMLR7I4kAXXGIwSzarBH56HOxI5APSY4qrloS9MKhU7hkRTJLLsXYfbi6WPI6Ts8eQ/D3mgqiUUCaLHtafNo+fN9FgGEuDCfg0T45Xz5a8N3p85ytoyYR90y3BEkjnBcnWH5O7rIL1wPi+xV4vlaijcxQfoNDU0M4S7E2bLU/LBA4tr9ps2dHvyuBPTetsUtcX5Ch4R4iXPJ5v+jxJ9M2okpZccHeCafzP2W0e30rm6DMIBx80kzqlg24+VrpOYJWu4/+BjzLtVgHlHx/942f/+OTdV/lVIhehNZfM3z9KR3mge+GWl4KRX6iw7A5nfOrvQpbKOdW9CypY8zYn9nIxKOz7G0/SuWz57zZJN81/y5v6zxReeoWXU0ssS0Gr56GiNV7r2Xo1NqsfKuc9MMgJ/0em+yXMH31haCezv5dL2SuhLEP0D1BLAwQUAAAACAB7bF5Zf5w/CzwWAABuWwAAFAAAAHBlcHRvbml6ZXIvcHFkaWN0LnB57Txrk9u4kd/1K3Dyh0g6DedRrlSdLuPE2bWTqex6ffZs8mFqSoJESGKGImiCnBlly//9uhsPAiSlkfy6va3MB1skgEa/u9EA2O/33xaJLJJyy/6nEpVg3yeLMpEZL7ZskH+I4WkY9Xov2dttuZZZsmBJFotHEbPcjvuA46gP9j5JkzvB1oLnuoGV0nZN/gUNXK35PBVMzv8pFqViD+sEnvJC3idxkq1YwbNYbnp8sRBKMXhgVR7zksZYOEJF7CpTeVIAHvMtK9eCzWY454fZjKkSRvEiZmkyL4CO3kbGVSrGONVizR44TApQSpHh2L+J+yRjP/3uO5llsqAJebXaiKzUsK+TDXsrSlEQMr13fLuR0OevoiwBXVEA4ddr0eAGSxRLNnkqDByYkrN5Qlwl1sglG9yJ7bged8/TSgx7Qo9RFltV5bksSjXp9U7YT4PzIVOCF9CwBGSR7qQUG/aQlGu2TlZroUoHUo9I5YplQ1aIjYQpcOIjRiWZEgWqA47jLBMPNBJkHccJqUmaAhE801rBNjxXTHBAD4hDyScgYUPS7xRL5YITNOA4YoGs6CHHgV13Ii9B1DgIxC2QZbYLNm94luRVCi1xxF4iOwuhqrQEDpKSMp4q2WtzK5XyriJ+82yryQahAna7uGO7Be2kgqidXgegxwkvFfci7fW+l4sKSdVE8pL9YV2W+eT0VOMYFYLHQFQsFyqSxer0BSjPZCHzbQFCKCdssBiyi7Pzi5OLs4vniOcb8S9esJfzWGRZBao2SZOFAJlM2I9X12NQBcF+uPru1Zv3r0gfNrIQLBYlT1IFoPv9fm9ZyA1byDQVZNYq4vMFqiZwif1YkVn9yPMcaNNdZS4KXgIs02cFHE5L3VZusZ9tGfQY/L3MtmP68R2oAkLTT+hF9K8rMJ36PT0BeP30Q6JML4ODfnjDNyK+rnI76Kdcq5p+8hqut7n36+/cgP05g+7j3rDXm07vwXDhaTpll6x/Hj2PzvrwFnClNzd9LZn+mPWzlBcrsAT6rTbQBR9uQarQ4SrLK1DlSw37xqB7g9QjC27Hjs4bwq9uub3tXRsVvbRoDvrXbt65rLL40uIx7KE7/pvYvs6gu+XpzY2eA/+FDmIhYqHaPSwqcylTwLu3SDm40FebvNwOAOSropDFcEIsesb+sd4ylYtFwlP0y7b9j9SQyRJsolLCOHeC8UczEt2dYQD67VKCejHxmJM7t3DAfYF/zaXuRuoDXkIgHPQSkYMFtp0KGFgpCB8KjaxK1JrNRfkgAAInT4L4GJ8Bg7WTbkHLgVpH9RsZi0GtSIZsgDVBJtEDedz6EY1ZP/V6z9p/vWfg83HudIU2v94o9mxHP2RQBlMr6221yyfLQhtEUky0YgOVLNHvncbygRzRUhToA2EYQNpI63wRTAY0KeAMcnQhNznH8FfKMcRMaMZwwDPXUfcDGHpe6IkuSaEvrVSFfpvFMhPEVvFYgs7cKTZIIhGxeTXHaIsYnVb5GGBAHCjBdw0j4AG4mCuGqoEzqSS7O1UPECMzCZ4PeF9W0AHlDcOM9wOWqecR+DxgcbYQpcwiEVenF8/zD6cRceuBbwEoZQ5EO0BFopMSYwvOj34byCsR/VJK1gf23W/7DBz0w4l1wxAb17JKY8KK0Cccy0Lo4J8iS5HmAFKKvrdvhy5TCW2gv0RUBarV68ViyaYIU7s8/EPpTch/3aCmgdHlUlFEnJDv0+aYZCW2GIOdsNp0xzA3AIAOYMRnvSE7eQEqmwlrnf33MN1JKU+AgJO5LEu5wZ/ZCaLVr5WQDV6nchuP2fl//f750BqVyGIkDrQprhZaUFm1mYOKAFO15iQKFZGM1AR5UCrDVQMGma4wihETZT5mIlpFLK4K7Mu1VoOFR+y90Dr+oSMKISzAB9gDlKYiG2BHi+lfwGVa8KS12hKpL7yiJ1LmS4J/Ay23Zuh7bTeAx2LNE7KRBYg41lZCneh5qqFdsBHB/U92Tm067ax7/MEgOXEifsa+W0tp9ZziQaEHRK6PRLszM9Sw7Bz4lyy9TnYScmDo06xq1IrlK9iNA3mLpgOGTm8dPP3WDR1OAig+9W5IL2z1eVtP5rHgR3mPySIyGg2H8pwosAKSiSPfydDIkUzipm6LwANjbx+VZ5DmPJYmh/LG+jxt4N0tUwhqkDcWiCboBmW3nPXv+YJnYOCp4MuIva3AuqtCaxVyRZAwIAQhmWhdBlSVlUmK7wrMZ8HvYiqLCTDknWQsecoXQvPC54OvxZr6LrqnOBOZQu05ak9B78hFDJ37wf673U+L5y03VPfockf+eHJL9RvfUY3bnsqZMHGUAj1yirjzpP1iCIPFjvbVmuMPACTB+K5BoIsqhDcBAI88C0b5vyAcaxIIqNGRAf53wmAl8OKFZ5amT4CbG3brW68zURKjNkNvNL1pWJ6vDl7XoI/TDh9Wh3HUtlDjF5q5BLZlVQ19Dgp7Z5iM2k40goO0+ltL5ji11YqIY5LldvC5AbChR1lHcHgjAV/KcGgVh5EI115ox9rJaz3AkIMcAhILgem+iAcFh6X5IGOnp+xi6ElHR/G9Zjf0CC1EzpPiS0Z9a15tO3pHc5GiW6B60b2RcbJMhA5sNm/+s07R0C0X2npikUNwoTVqplNbSj+11pCPI9cJTzZb3m8l+xxtsvSHv2An5zSBMxWnWH7Y8uyraTQHeEOtFCJNlkHQdjpTAzs6JjsYv4Kw7DucPXN1MfET9BuSt6+h3H5KGzsFv+Z32o+DjYIVc2VcE2kOufrELfIwZuOSReWyjNgPFJwNGMrtXX5+RJDGyaalnIJR02zNkEQY6ZcRMmZoVZ0aYLWAmtEAMgm4Z30pDmgnQfi2y8N7juYAC4hFGrpoDxuCTr0KAcuwrImtL/lKrb+k6HGmCYm76dnanp04DCiAu6KgOtwdd7KDkqXM12kq1YlvQ5sJzC3Md0fXw4UdyuoLW2qbJGunWG3R+CIl1gRoqdd28WfWKXVlQvjOuKuzW2ttZ7fjmhmB2I3Ez1xLS9Nr1h7i6s6GTUtwtSFdaxuE5U+Deb/ff8k2uoX2M04blX2qHVCd221lAF7K2+YwtXyswCJEKlaHQMZmbQ0LEzYHDxjHuEcA7DVLYVfR0IUP3U3XrBLxgKUcCPqCQMW4E4IQDAN4MU9K3PUwU2z4FqcANhQJZEfxWNe64QfBw0nX/J4cM2Qf9QaLLnnrLRCgL7LM0TRNmyqo3+5Sw55xXbCOmSZZUk6nYWRVIl2Ogze44zNxhd+bugSLOoIGGHanup7r7Uqo3Z1NljihOin0eM1T1ejSZTXox8yyyNNyYMl3EHxww0Jn16G2xG43zWgD/r3lBd/QlpJ7deL+av0HFrAJqhqVULEkb0vpTBpSA6SJQTTMVGq4Nz2O58BoQCuD4EfFNdCPei2Phj8BVTNV/O4pLItrMnHYsspoGqrdQeqtICXf2BwU5F9bRlNoirL3uiIVsT9vUU847ezoTICg4Kq+gsweKAKBlOmW8cYayUNI11dwCIZsh5HwCjdGB5hWgppadmNmn7DZjBRjNrsNubyEluuigoYxG9GeQTFqTL9K7oWxvxHtr3k9ogDa93o2TC/cfHUPq4adgglQtds2UVo2EX7tpIOeK0a92ySZUBa6UQfwUbj+wCWDTwvVZ0PTNChrqem9ItVEYcw2gmeotyNTOhvtUYK9/KICsSUfXDMkY9ZjofZsAnCzmYtQCOBcb7BeUJBDSc9mpBsmKlhBYojDkTBgNguX8lzpHdKiZguV2XX3C5AW+wlUqUhQTLOZ0SsUooPzRpaiYenuCWmzDJ0LoCvR7CRZkVRGI5DWaHTSiiBYqbdbuQ4eFb3w7cjuYI20CSFQZHNmOIwNPosT5eava206PNlVNYYC2jFablGwjthLw0TAHHrUO/mcLUBYctNUZ1Q45zLuE262763gABIoxYMs0IRXtJ9aW8TLFNQ34yVQghvPdhtCPOZpsoB1wAT86noyM/utwDncY/FIarTzR2zXWQG2yFh5gsNwZ3+Dhti4ESiIt2BjlxghwmaPbEwVV2XQKtImOPwreAJk/R2lRltpg/5fwI3NYbHIQpZrN2e4zGoGRv1hz0ccfXSiKBKikKwt4QGExvoRA3E0hfdLDHc2AexGtYXmsj+DEbNatmSrmN+4Kf+brYCUX6DbxwaODilLRSdmHivtzyMxdEw6AE3bN8BVb+/RyYUQN8qRL9nNbbMvHY1odHYlnkv2y8eAERT2zWqzXmkEo80SB3saxP4EVgfu1/hWTLScG8QR5P7qTCbIXt76kbzDQiPfCozfDIWxCwVSI39+k5Xtnv7pObVqApuCx5pbpAWwAN/MY84eJ+zRTz1h4VVA6ukwUmUR4PJOz8SxgY4iQX+h3HkOswtrfIePIm5YrlAp8exA9E+ZZIObZf8Xu2j5OGH6N3nij31KemxZtlae22GTZoChm6fkoqZT+IUby9Ppx8EvetKPQ5OS/4m6aCfmaCYHOFikkMji0YMbc+wAFmEj8K2Ktrvh9+juwT0SZ0y/7jTXnGagUAMT1JFJiQ3PymQBgGz0qYOPiTqeex2h0/cc9WAIrh8mxzRab+0/PdNuYCZv9mDSe4brD33mITbHEmyKvRuWTbwPAJa5LF1XX5NiD46W8XvhEvUo9UvNyxBmh6WAuAe0wCUZ1+Id1svjy7Qc7lMbjItfVW34Y6famCM4h2oNoblfazon2g2sQ2vw/bFaQ7C6tGYHsEO1huB2aU0L7lfRmpXVmuDAC72gpSZ7+faKPevuMZ1ueHEniok5+Ynlb/oxsPs906n4QKeywsJMpN+7Tpno7oTvTSdaVLS64FvTwawqW130e9NJL0hafei16bISZbsDvDTNi1Twot2BXpsuOpa3++j3tuYtSrtIaHWs2/YYNGoZkh+WXboM3OrhpD645x2Uahi9LobssnwXTqkuoo3SVjP02r+p+/rcE1OgtnrSrkQAFXUwuDNoDSmY3ukNTmNvnvL64T8VmR/9k6wT3fqgjJa/2ZHoiPoGHyxu10E8mBH3ZnmSKTPt2J0/c+vRLgz8ValJ3Z9EgvpljeQyKLsBc3zq7UFMOlnYKTbDUFzfy3uhdyZJbGEeVHtmzHVh1eaYV5izvqouSv4H+1nRWk/mCApopDPCdECNZvHLUUS2c94EOer0YN3pVJg4bxNhzgZRXdvjDJgrSrpbRvB/F3NK/8S3drASSwi4/vGX/u9wBYL139nMHozUcvUONFo+dlEW5L5UQA8lTFXzW+220FEUej47l0+m2kGmb96N7Q2DzUulklXmF7H1fCC6NsVXjg0k+BSPPG9DUsf6gEgILHEV525OYHdwfl7aPzD231gM0AKAmtxYc8qDjoTSoXtq1qWhOue21ta0ovbZDrP+qqUybgwZN1ZIes+jnqq9VHV7cscD9WQci3S3Krfk+o42AsjNHaKyxhUdqLlmgOm9j6N6lW7noqKEa7eb1MdwpcNCPCbhaXsCN7Ex6+n4pdawmJEPNNaUStuOmPbynYbaBRuSc+mpp5dMhXgPQ0BBhSCkKCISWv1N+cFzFZPbph+hrjUvgk1F/Gvvw1gNAsg2gWts1NgadFenThf61+28SGKc3JTcPOX7B2atxn8Aq0RBOwnBPkaOR7285Dh0OA1Ho/e66hsvXu0XfJBHyGhEPmE0inaCbin+uC4jGy5oQ9EVULvD5gnpARVdq3toXg0OyKrsYkJja6nJiHc1rXiatoNeTIvEtkVjTRGSSWuoJ0mrCfKI1KTNZnRbwN/H8O3kGczlL/mMiDxMEXtClA1cbIgTteCQAcRtf62syZn1RbM8uysjIIz1jMe7maHbHbd/dDQprrdz9iBV80rfzeg3uN8fNiB3Vok16jb3r9lLoPYy2Og6JeN2CRrWeg+LhQ3utVD8TK8dwBtGYUw/gt37AkxH0Tjka2sx21r4Nu5Pdi+BG6PQ+5ZG4ca7fOhTCShK6MnrfM3UzDNn5cxZH+1ougC8b9DhB2q31nRqT1j+Hlts57lnt1FLJY8R95OWdZjkjaBI9z5PWm6V8JuV12ca6JeVGHL4AIE1LuvtEt5B8jKZs+MqBcyQmqMZbZYuAaO7WGAXtuYo1q9HFPnnGE8rM/iN2tGn5B+/KhF/EWvrkvb/teF9gmT+X5knHRx0sjuiJhTH3vcAumTRWTzorAftLSAcVzxY9ulsQcdUtGbyWPT160lfoqSjK02dEgLeT2HePTL6WW8tdNcq8fr0o75vfZwIv2n9p0NKhu5uOZnG3ZIyHbpk9clVvdoR6oPahxjUPk94RfdxPfNqOsXWCWFPeK8+VAnMhXd+sLTurvZ60JYSq1n6PBctDrHdwA047+CP2bwq2ZKrUhTRLv2gK3owh9YTB+g3ZfKdx/Hx7xitecJvtE6BdAYPp3Tmbgei3ukjnqoB68sw3nrS9wxUVLRbK+YuzVN+woKi++caeiyFdhwEmI4yO31x4D4pRBgKj9cZM/AgvaFbHo0iLGoAuipv1wVUo5AgS3S6bgcmVJD6PoOevnkXqJ3uB9dRG++tZzPAfP3wr2TUuqIeeD61ll1ry7lRF/h5sUdXXuM9KPxSA0gWIYWRha5vU17+IDu3Tp9UHIhzWEJHcYYK0y39VsHbF1h36bzhac5teDo0MJ03q48Xx0K4CNTqnK6uXPh3iHAW70IPPl/cBvpxrmV74V1gwxH2t9e7bvZi4IUefu7ryXnUDocaljfQzusPvGgM3E+GpvOCXp8H6wc6m3DQ3rTbslK2Ru2OEySY36qFvQVrd6KdhhY8uwuP7RXbUF76SjnuvrfrrXoDmfSpvh2If+JxgZ+uoqS8K+lur4U/i1R3reLbEasX8J9IMRHQQXDzM0lPLAadR/lmVFMa9ylE2yvyYTRuroW7QjFewOBsXsg7oYsCYRWDPnWSrSgYc7wIu8LTOfaymk7tF5yuyaDfpQ8BjNlWVniS30uDqPjhtrZ0nLZ3z8mrUmKoP9ZGqSF+LYfSTA/dVta2d+Hq8+TYdXR7NXv44oLks2eBYXEzdzI/f585/CRU75m7ftP6XJS54Wk/OzbI9Hc37BGpifsm2lO32+qbi17FMjMXoZSSi4Qu8rmrIcEhS6WVyUxq9A2vaqv6rWeH6jjgBK3DaCO772hnwHs2S/FgP2KVWb/H05R2Lt3ZPbzVpVERduO4cZ2ucZUuY/qDC1ba18HHkI6hYwct1GbpcFJzI/yvYuoDj9R099R1u8+9alezDP/wiJSStBTce8eudWGOIGi9CthLv1PK5pfms42Zd6rPl+0B7CVo9T2pega6mOSIxlKXv741+98vXrxgN6gweNF4qS9KblBU9E2qqLYwZ1z6EOdgOPavCtyc3w7N5Q3r3xI8cInn0waNgebO+gdMafRtYjzocae/6OmdscZeQQxC9Kb0rRL9iZLGNRdA15bkRpl4LAdJaeYy8ec9LNKv7Nk6/5s3SrUn08EuKVtz+JWKT5sHzyhckgIMEJ5J4QwMaIzMTaVBcA0bGqzbszcEvp3fC+8kfGnH14BuPR//Ao4PLdPj4td1fF10dNPyb8f3CY6vyd6v6/lqI/u2rm/1b9fX5fr+F1BLAwQUAAAACABsck5ZZHmhUMMAAAA/AQAAHwAAAHBlcHRvbml6ZXIvcHJvZ3Jlc3NfbGlzdGVuZXIucHllkMFqwzAQRO/6isUnB/wFhhzaUHrpobSBHMUmXjkCWVqkNan/vpIiEkr3pJl5YkcyMSxAfl3ALhyiwFs+K6UuDlOCzxjmSCl9C3Ffkt2oIM8Rf4J/8ei2ZBPsoZNiaGxON1TqRHa+SmaxILeqdCax5YdIKPQeka8FuFSp56Ib8bX6V3KWTC7COKPY4AsaV6/PNdD8TLp/vT9sEvIU760nMsAt0StPedvUJ3JmeNgjGBdQBsj3WMvGNP75hPb+MpwXqV9QSwMEFAAAAAgAfElhWdBX67pEAQAAQQIAAB0AAABwZXB0b25pemVyL3JlcXVlc3RfbWFuYWdlci5weU1RwU7DMAy99yusceiQRj8AqRISFy6IifWG0JQ13hqU2iF2KeXrSdetq5VL7Pfs52fXBo4KEb87FJXMTX8ZJMuy2hsReJ9qr4bMCeNjBimeRI26ukVt2J4zFo8QMB45tvvAovtLy3UX/QaCGTwbez+xx0hlKOe5xWXIOt++7ap8A4lVnplfwlRe6TM7RAwmop1aFJfv+ga4g6pxAg0aixEI0QoowwETvuWfxBRHNUKbpMIhci8YBSwDsYLxnnvozgxBBafQGupSetiAjn3TY/LDYty4iRsV9Q0S4C/WnTo6JTgmX4Iyub+kpHfawHZg6ywWM90dR8eL4I2OBkJZQo6t1NEFRcpvrk1O+3n/YlpQPlbPTAmqDx7ppM3qcyFtO53lLOWlqrYweny1fsbJ8hw7FHFMC0MjahcJpBAku77Ov8/+AVBLAwQUAAAACAArcHVZX2ZmMgEDAABwCgAAHQAAAHBlcHRvbml6ZXIvdGF4YV9jbHVzdGVyaW5nLnB5nVbNjpswEL7zFL4FpJRuop4ibS+t2j7A3hCyvPEksRZsZDsN0arv3jGGYJOEjRatlPV45ptvfkHUjdKWSLAnpd9awgyRbSK8tGGSowD/Gp4kCYcd2YOlDTRWcDD4j6aWtUqme82aAxVyg9b5b3fINgnBx+uqGigXW0ueyfu/Tr5TmkjFgQhJBuPcCUzKmWWo+KKP0IO4R+w6/WJVFosts7BX+rwoyfMzWXQUFgTJepWncjRzjwSxP7wqbRB19DUI094mi2xi3vmxQVaQvg/4pJDF5su30sfhgrjglf88kgZ71HIC1Gdxq+rmaFEEFrYWOB21jKhFxbSw5zQ27XOB97RmVosWf1qMqCg7OSaBreLjejw6ml2aVo7rJLg3OJs0SHUHlbOmAclTbzUmJ/ZPtTqNXiJP6wc8XZhG3ta3SgHGRWfATtJSeII3ywdmPWOzntiYA9PA0aACmV6c5kJa0AbLJNQoXmexrdXnOKo+Vc69h/1K0hB3CgDtFm/uYTwlyUQ4qcGQP7zJkju1CnQm1r3J2Hp0p1kN6Ljh+U8cx1/uODFcYhdXx1q6qeo6ZokF59AOx2gKptDDHFRHY/0SYfSVGZwEJcMRuISCedvXtJvecccsw5YVnJ7cDFqziWgvw3QMJOxBgzmoim/IrlLMJvfW1Z19F9DJ4twJcAn51IAnyVUoPpnjOa/Utoglf/AH9AtmMBdGyLTAhk3brBvE1s1gSC3vSpRbVQlj06zMSu/UHana9aDU4PZH0q4sVwwCfwPMZePQoaBO7vdCdzeID8C4i+luCLhbe5PTQVQwwytaV26vPc8oI+xF/Zri9KZjOUzLLGgwa/HeuxdfPOD4TguLUxirU1e9fgtmZSxZoYR8v93GV4sjDHNuu/Y8RuqPpDx87uvnGmr1Fy5uoxcMvUUwlKF+2DvAH2nOj8cjKnJWfuhi/ZkRHAG7Rllt/IdCLO77JM5E+QAlv5e3SuJ3UOjrWnM5H1r/ApzTKRY//G0N9Su+B90n1zXrJNz1c3DJf1BLAwQUAAAACAAOaXpaa/ffu0QGAAA0GAAAGwAAAHBlcHRvbml6ZXIvdGF4b25fbWFuYWdlci5wee0Y227bNvTdX0GoD7VRV0kL7MVAhqVJigZrHSNxsIthCIxE21wlUiMpJ16Xf985pC6UpTQd2mEBNr/EPjz3+wnPcqkMSeV6zcV6wN1PxX4vmDZ6MFgpmRGzy+GRlI/vuTZjcspj4171TldPa2Y0/4PJVUkYloyijAq6ZqrCu3TgDw46GMQp1ZrM6Z0UJWwyIPC5np7Pzmbz6PryPTkiwcaYXE8ODmjOw0LwnOUmLNZMmPCGBS2C+fHPF9OLD79EZ9PT2cX5dI7kSHewfX1gUI7MduFvWopgYAk7BNGb4/nJu+jq/NczoH11eOjwpidvzqPL4+mPVwBdWBB+gkRmlItg3EAUo2nmAz6CDwHNB+nipheaM5VvdmnRAnchQN4HBGrrUB/aAQBtDwxIpUqY8qEdAJB2YFysFO3BBIYrmvF054O7EGDZBRrFIa5trA4Mwl/sG9aF5SzmTEdrJYu87wGJHnjb490HNWov9luqODO0hbWSKqMuS5cul56RK0MNj0lM4w0jRhJtpGJQfWAl27KEpFwwqAZt0csfkcM+Ip/uHZsftOWSMbORiYUkbIWlGCVMx0wkVBgdgfgIEp8ODVX4ht8ntpYXXJjlmPjIioqPE9BGjcjL7xukSWNNENTfL5kplNCEgobaELkiNE2JLTJyfqqJ2VBDKNhFPRmIJgXDPwasWfMtE0hDyZALC/L0BOp1kUGdj8JBLXZGFQWbmdI16GX9qUEPWFu/4+e4VrxRGtxFbjc83lhjPN8QNIbYLFhB6LAfEHQXGVIBAZO3DJymN7JIE3LjxbKW2OvntkJzMD+jdzwrMsfbupBB3Ct/+QqVwjZ0y0jpO6tV3eaAX6HHHUOQaUssxghSB9IsQSvNhmsrfkzAGQljUMtji6QYdnGWhL35UKgUstNv5qHfx1/0P3UacM0P9PYzGXhrZoYjrJ9rjXOJIgDLh24lT0hS5CmPqYGyqXk8Iz8xIhgYBmiQENYZ8YaniYK8s8ayauph3UHE0jE5np7imw2qg2l0JjXPNSQHvNyy5+COwmphOYC3IEOUojtPNORS4ylEiXhyt++iZrCEXCTsbrifJ6MHM0g/zGtRiZssfWdcgYeMV2Wu8qAyJLmhBtqLxkR79V1NguXAMbuA35oND8fgDuG3ktG4rcLn5ulo0ko7KxFM8Lgt+IS/+GJ+nmnOvBkkKOapF1SS010qadLCLGHYS1twm9Fc5IUJJk69cffdiwJgzVXBPo/kQgWoHViL7L5jDExQGB3WmHfz+YzMLq7mlVktXMV0LoXG2dBesMLc8YhyCbtYSTqEOh1XLhjtSz3ZsPgj4auWD28pdJsijpnWqyJtUQBmJT7EeVToKJYJqHJEXh8eTjqeSaihoGdNg6vYcE8Lp8nZHYzX2LRal10ubT/khmVV16uYdZjY7C0RUXBXHfzs9ZmwyAGXDZEwhMQctiI+JovlaNTiA92BdTmXm3XIFGxWw1XwlvLUtaFqNNSN2jkFlXUl8cn+uQ/tmlBogg6dkE99fr4PRn59n0ixZcpVOPZGKGbffTcUYov90g0+HFzKDnHwUtOoHAQxhnuuAVmPbB7V4vLY2mH3Czwl8Ne4hpM/yRS2g6W3cXyrqeJ56ZJlEtzfzAuXV7bwrVNWPIX9ArwooahoCht9snPLWtNH/DXFTSa/K9ZolUMAJwFzh61wVf2K+osIbRYnIQ3ZQRjLPLcaeArc0cjIqqzxLHFrDCaS+4Z0npq8WnSQL7y1PNfaMltD4y3DpKwNcZchqHM8O69k0YplV82HJkhL+W8yRVocv2qSfNV8YNi3ysnw9Dr8N+rX+2hf1mnLHMJUrbur3XNfkACWlWBk+VjIfno2q82yl7VNbOABvJH1IqgAQT8+9Ep3fG167q5eioerZVHJWoL0PhZPakKkKYvxPz26SI1Xze66RBG4IjcrMdZVq5j7WkvbuKpVOL8s99t3y3c2Byyep2Y9gsor+LGpI+Ag/LsjB46vR+/ajOb2H2DOSeWNiAEq2yiIrT3WdG57kN0wuC9yJbc8cTG1V9WqELHhUvxL52wslUuRxB4utRG673D9Z4+8mqEsDI7dvvH4/7ViP18+jZ7euPlPHhQupZsR18yi0bKcTz4cKzB4oidF2YmdRX8BUEsDBBQAAAAIAJJzZFrjjnWjfQgAAIAiAAAiAAAAcGVwdG9uaXplci91bmlwZXB0X2NvbW11bmljYXRvci5wee1ZbW8bNxL+7l9BKB8qITolNVAcIMAHuLaCCpc6Olu5N0FQqV1K4mWX3JJc22qb/34zw+UuVysluUtzTXDVB1viDofzPg9nN0bnzO0LqbZM5oU2jl3LxA3ZS2nd2dkGH49UspYrw9UbG2hurr6drm4vb/58V5EY8WMprFvlXPGtMIHu1i9/71fPzpKMW8uudJ6XSibcSa0mj4ko8Eu//jYYnzH4FEAbtrxWsoCHzU5tPFGv16P/V0RmhC20snKdCbbRBn47I8U9apdyxxkJW/EandHG+U6wVGx4mTmUOhO5UI4kY1Ixt5OWeRHgS2lFytZ7WBVstnc7oLkXxiKt3vhVYKyV/EmYIVuXjiVcsbVgGsiMTFOBTOnYhFvB4CHPnDAKzrsXneMtCGalEWkl6piXcKYZs5kUsIv9Fc5OdnyzEZmrjUFfXt9MZ5PZfPX69iW7YL2dc4UdP3vGCzkqK+3LLRw0WoteawP+OX8xfTmf3E6uV5Ob69mr6c0ceeDmZ/fnz3DzueOPvL1xfvn3Vzevvv/H0T1ArpXO9wfS4Z/p9eRu9e3l/Oq71d30nxPYdf78+XPvmUOWbbqvgYzonrA7tFgCNk3ABU4zC+EhWGHA87q02T7EAXgvk0pALFraWf1Y+Y0X7Oe3niPEA9sKt0I1VxBHK1RagjP6VmSbIQs/x5QmC+vMcsD+8CdKHfzl02chlVsufZjGoYqfv5QCBKrjGiIhRSFLoyyEREaxhKfDFw5PQRsIQZ1I7kCHB+l2RLGFqFGghXUYf0GqEXsBoS9Ap7BUH0sRDYGmihJyHMIPYsDsw7IuHayzFLSA+OP4AILfCDx0yB52Ejlq0MqikSsRMVBTe0Q8aetjKzGqMKZQLrjh+YEhIyWYxcqhEjAR5rE/Gw0TnUP2sTtdZinm2I9k0jQ+xHBpxal6M2ZTn7PBB5ezKRxrIFWrOgJqkS5oJ2M0uBVEkRtmdS5APygqWw3sH4xW2/pQ/NQOUsI9aPOmJRN5eUyxUlk55wUV4NhlvlTVzqqcrMDuaAUwlHS2Y4zptR0dDbfSZBDeR4roKK4UT99Jcbw0NJpV0XNB8dMfNA8gP4tMOlImOBy0AlXW3EHiWVSHY7pC3kFGpsxCAa23o/8lhih0oK3oPx+yTKh+YDQYvlfogxIzGLd8RTKA1IHhQo7l0/+Q5fKsxfIJmxlRYNKiylVrhHa2zzRPW5TVGpae1jr5jxzfG3sJh93nic4LniDF3JTiCAGkNuiUVASt5287EgsDhvYR9918PmOzV3fzIHuLtuqxWC/b7X1UeB6rQgMSqLb2IfSGQc/B4alXO5G8wZSKDfXALbNlArlvN2XW2gGU4fiRhaJf2lWiIVkuqG2MOyagpn/R7PmXBZwx6JBhiFX+X9EWiDb832WIH23kViqeUXtoIod2jqBt9HvUHodsseweRQwoUxbxvkWv+tVbLoFl64gWD5FZ0RWLCt0pXLXp3ZGpGJnK1x+PY+LKh1b/+Zhx3/Yit/ndlQbtZgm5kwiVcmgP1DNR9KpfOm5COx03rXHI4i2IL8fQuQ110proePu8Da2ybn7UNKkbQRFsumZ0QoBovmmS78DNP0Sy/dBtUC3JL+vTmpPa3Sk+DiVgthCJ3AA2QcSMFewN62Ovz/SDAE3r7nWQYRVaGR2K0zUYwtecP8q8zD17Up06SaVvLFN13o4j2vRNnwQLAG3UkuPyQKPIqr5Oc+cBMh0MhkiFgBpAJFAA4QLw5bTjY74lfXMhXBQ3pGpiJOBvyT9ds+1g35o7hFmcanCShZIzwGr62qIROC4gQOP3WqYsLaH5AnOAvFFH/hsaA2GDxtyl2E12MksNqMhdXI4RM0MsQhG/vLnGZxS6fg2g5hxs9BUYHU31IL4Cz5ckBXGgKyM3hu+joyFjGrshyUqmj6BGc6ccSZWKx/5hsDfF9PCJbW1fBKbj5XEQ4pM6lIA2Dvn6m/cAj6ginMAe77q3nAAfEdOT+ONdbH8bABK54TQI6fgKSDtrv2OTGpt06J6wyaMzAPRatZiuB1TmoRTloZgHZkcRTiA8jWwOasuoLIBW9HGjBzaxxwnftAHObwxOwGdaQb/wWY5VEBI6Ntqag0exMvpi31y6wTaHEAcp+gcGGbQRT5glfBjcaSYE8KuZELBf2I1W4tSg4LbCAqRSdaAvqeTTuNE3jauKhsLoe4CUaSwQ6TWidhfzgibOVdNjaC2B6yaXim6mjxB/2Z6d/5EmBhKHDBO6rWorm1mZqHeaumWDuQX0X2gZsYxBZixSqrG9wiCg/gN/sDJQj0f7YHopXdH70ZjHIGgFAiGedbz5o+Bcx9627phrEUO0LwXfRGMdslDlXtuMgSigQJ5946b2FGJIowowAg0icALRstD/Eg5FSX8rcg0J0gAdL7Wfm2CObyQOWUENjTPXzAie7v2wsGmGcYp4SBU3+pospPzRMUdoujwOrGiEp7Rje6hKVa6QBJEAj3zldOhNcMDCe4EC3M/ZVEtMGQIX+cIzLD6j1kizhX9eCMQatQK1ZzEOqzN4YNUV7xQYagn9awKiFuNfAxR9FNQR2II/20HKJxyLvB80hBIAAVsDBbolPWU9QOG9gX8TgyvAp4Hpy6PcKKJhG7BDboteWOgdp4ee76f+O9Gd8h/d0U2TRTgDRy/Htn4GuCbLBCBAIC0zFyWvf3OBBsYrXHNla82NTlWQtkqhMnhrLKkKHlqKnEvPuzOh+s1KCyMpnv+XAAnfqrxv+hMm594eoWnhywlfGOHw2jhNLabxx1rAVbdBSNqDiE2pqEt+9CSogUB0G67FsdEbiy8bRATbR/OS2O6f8E3EO9DAB72E+P3+X38+vCl+fl3v//KKXr07aK7kdX8cLKueGa9jJvY+q0t6+w3CvwFQSwMEFAAAAAgAGXRaWudtKer6AwAAxgkAACwAAABwZXB0b25pemVyL3VuaXBlcHRfZ2V0X3RheG9ub215X2Zyb21fcG91dC5weY1W227jNhB911cMvEAhYV05m0cDWSBNHKyB1HFj56VBIDAyZbMrkypJJTWC/HtneJGlTTatH2yLnMuZMzeJfaO0hVptt0Juk6TSag/20OADCH93KUo7hmth8PtcHoJMrvnfLTe22DPJtlxH6Vt//Ls/DbKW/aPkj5JrOoxyyd1ivpwt18Xd7TWcwWhnbWOmkwlrRN5K0fDG5u2WS5s/8lEnTF+nV/Pr9ex2dlnMFpfLm/liTfqTfcMmpHVaidpyzTf5X0bJUTLQnV/OVsVv5+uLb8Vq/ucMFU9PTk6SZMMrwDD0oQi+CyY3hbdUYDAspUOx4WbqiLk3Vj+MgW6CVDgX0j5k8OtX/4TsPUwTwE+ra3TWj/kzfByUU4uxFNE9Grl/SNzdJ7hQ8okjsVaB4RaEBKU3yDc+86oSpUD66gM0XFdK74MM2jO8tEJJqBk+gJLOXC8WdIKyae8k8y4bjfrp6A9iigrmzpMFSFaASqekl+f5KCg9MlvuHPKay47HDCYT+CA1McZVUwuMcMehowAxqM6qquALZtCRpbDUiAXN5JanJ+Ohw/FH7jKfpmOQ1Wihnn1N9CN9FnbnfcNLKv4jhgyT/OUVVGsJ5kuETIfETnTozZ11Ad6Lqfj8gdmQf8/PUvOGae4YCh0KDTvUim2OEflndPHSndFnFD2Oph7EeHht9QHvS7xd65Z3d68D/6G4yP+39XoJy5vVOiLp5DQ3jZKGI4ThvMhDcRaNwtES1FLslnFEnfW9Xex4+R1ENQj3mRkwbVlyY6q27qRRKrrNjWW2NUWpNgjBNf10EOqGWYbYOnkaHWnPc+RakRPgDPMVuCOdtnbN5zF5CwNNKswgXjhPKEy/+Ra77JiEMbZ2NsRFH6UFjmpWuzl0LJPiaIEuvHbyRvsTXAnsTsI26H0syGjYNazr4V7HvzHUjaIAo8YBl9KYGODL4Bd4OzeGgG55U7PSl6wz5pqKnqIPd/xGsx/4vQ/6AYEMgL3n77xpeKBgrzaiEuggZs+lA8dJyB1lkgL7efQxWzlzVtM+qGNL89rwYSbDws251krjdLlioqZAFbq1WvAnPsRENRPmjPt5zWHlihioiKfw8l5tv8aZizZbLd+iTvyqqzha7FadX9ZCUh8yKo406dE92HhxUyip9ofCTccp4I2/wLn7vfeIIVMZcHeUvLcVcbzjTmobRz8xhLsHO5mKgWFfcyiZhGctLAfHm4mparR6Qmxu6/Ckz+8jM6LExViJbUqXku35WURCG+GJ12dReL64ugmMDddf/1WFeqxAHkpMN5PWFMiTfyegbZ8iuH3mdw/+c3NgwE9uaIOlo/EowzcGoijrp+j/vXWMf2iqfwFQSwMEFAAAAAgAqm56WjYA3OxqDAAALSYAABkAAABwZXB0b25pemVyL3dlaWdodF90YXhhLnB5vVpbk9u2FX7Xr0CVhyVThfE6ve5UnXHjOnUbOx6v0zxoNApEQivEvJUgV6u0/e89FwAEKGm9eaknY0skcO6X7xxl1zWV6I+tru+Ertqm68W32vQL8VLn8PeHoS3VbGbf1EPVHoU0om7do1bWBTyA/9piNtshtWyodavafpM3VQWfc9k3nSP+Pb/7Onhlb9X5Vm86WX807uzbr//yevP+xdt/3M5ms0LtxJ3qN6WulbxTG9lvTKtyvdOqoGvJTNg/vXzY6MLckCYrXffrRfhO0vEbYfpufG7JwiVUHC8t/HXxH/G2qdV6PUvFF38eqd7Q7fl8Tv++V/3Q1Ub0e4Vcmlq8fimaHX33kgpkLXRNTy1PsQPzyLIk2eCSEXf6XtVoU9ndDZWqe5PNiMcrOKkeZAVOWbhTIzPZM7O+k8ChVPeqFOAeMSf2ysyRJJ64l+XAbPHbj94mP3qGbJh+r43YDXXea+Bw0CBjR1rGSjq+zMUydtR13Q69P2v1uGllJ6vYUWQrewzEbNDbkZmaXTa57Dz5Ho0KUhz2Ot+LYzOIgwS3AY1We1lldHv09wtRaFJQdkdRyZZSIRIECOjO38jEh70ySuSyFluFBuk0aFwICmNkZmNcvHj3WiR10/sg2x5jk6bWGmzUc2YITTvGD9pWSVB1alwXj/ThM/GNtaCuC/XgYhGzisn0e7SZuurwRK86ZXrgoGvUEKTkW/ChU/8adAevwBbqAaIr76MAMF51yzdvuk7BobaUuZpEOwuJ/MH1D2IZZHlGHBPvWGseG3Ir54AVHFivHIU1R5p8ID4cUGtbLw5K3+17rg9FU20MZU6CztGF2iAjm+9YC8bEXi8EeAM+Ub6fOzBJfSJfHoVRJepdQ1nU3WgW0XbNPXAsRMg6iDuKKZHvZZ2r0bv2MJodAo1pq4JTs1CtqgsjGjaurJqhdrEj2bMS/CqNaXIte3YeBZ+lyiyrpuPkWLCPmoNyaQtiVBLlI355AxFiOQRUFlRg4qtWDTi5VZhLTvA492MnnE1Cp7+B+FNAErXldAzzc885cryK1T3ofh9lPHj0Q2QpS/4Lshg7jIiZfTOURWjyX+DIOKFfiFodLp0VzfYnjBY0LRRs1LipyyPxqUfeJNlJan8ty3woQVMb5EZspYHTNh5KVd/1e5fyxFh2nTyeBBeRcySW0NgzOpesrsWXSIWyMRXaBpYqofI9cyknMedC7TLqLCZJ16kT9C2GUal/9oJyZ2l6WW74CbM1Q5XYEykdQZbRqaV4duNLKbQ4kOSfyO6vXdd0yfwFdCe2qBF7ea/Ez6prLNMFVmuoxIIrQDa34tVOuGIz2sB9+jLi7/R5pzrMDF9bmCQ6DyOugXLcKap72EZFYhpgInSvKkMJ6d3KmQepAi7LFatsTYlClFBqokKVfVRHMGxqxXilH7gzKAxP22u5zEHjrFVHoMsM4Glb5qmfgmxdUwy53pbYusxQWod0oAD5gWlkUD4lvNzA8+T6+Ve/Yb5svmIDhVrnJCe8z/J9A99G/IVh41RJR4hlwM7LStcJcMkxCTd1U6OTklM3pFCBg6vWpMtXEuJvfNwuT2/SS+te8Ab2xyUlXeJD8r1t2U4dFzb0Gm0ZqKmor0z0HsPQvXClaul9uIqIrP0Nlmk1ubgeb5K3T95HrZBp2CbnTUAXRy88oct9EhWfQ/I3ZzH8EzvlG9k6qBvWb8yMHYREIRKAqBjpqYMNjHIvlVDbDqDCU3PqoPloqN2yH1V6ctshSaD3mEuNR1JWTnDq5U479cQl9PrBgTKm2kVgNm52Y2vCHgktVbwZQCL4LhHU62KC7q5GIczQ4lAF4gEKtR6MJDrv6++4R5GaLZc+Qx6sVX9ouo9iPM4zQr+fYuBJT0R1IbrViekPOES6/ioNg9uF2A7U3wPaVVMwGKa6SASZuE9lRK0Ojto0VW74YczM507aqi8N4eBRyJ46XQnTEWCcrexzmADQzQCKh1xNEBjCZYDSNjaoFTGCqySDOWsby/KNkmbAXqArxn7QCSuaQTCww7jn5krXwAP8bAlBiqXNla5Hu/JYt9z9bGhBO8Vdnt46lA2Uz0VEFszgZgMcuepQu3JEU19pg3SXI3ij8BxHmi1DnsGozquReHxp8cdEI2qooULWGCvOFWsJ+MRDgUQ0Ec4PTb3GGEFIQKO9p4PnPOr5c4g4pjV1FZRuUh8d8fiGwgJtl/oLL1Jq+7qt7yEbW+Vt7tEjbnUQIslsKtRjhR7ObQyMZn7JQad2ZSP74ERrqg115+jUuESpYMRiTtqtCZ7cJexWwem/9IsJbh+0bFq1RfYSEu4V1CRwf/ht0kt+YFwIk4rqOlsNIhQMQ3tyncKoxIAoP9KwkjxPscz0qoF8Q0zyWIMIJnI5nS4iHJ1NaHhL39K/v+xu6IN3t28Ef346jdFHb+SDroYKN3dbKF1+PgRIWuflUPjpfKdrWYq7TrbQduAT1FhV/l/7w0lfHFtZ1A3fwbWmBr914E5ZHo0O+6KVAVvjrX8og32H/lRLnHSrQ0P1f4cBGAA/iCs5vghnOFT+FEBwv+YaqKEvB6OHDS47R9tpB8wGbcpVx0BotzvK/JPnF0WhAIkZEZBYsAgGHI2xZEcklEDC9XKosG4W5DNcC+8CFBJIAl4F7USiM5VN4A/NsVBXnSFShGZQbLMogdsOSkgyv+0lOcGN/SM+1IXbv002YFmGI9y0+OEAE4Ph8G1UeM8F81mCT1ghLcT1M/hjmx4rtZt/b1CXfwezEPfG/wZB4UAOq+Mm+6a+Vx2ASvH32+/eekDDAPQdr9t9SbR3OgVVBoYFCdn1DvNI9ZAekBiDYYew+Y4WLGMOsllr3vkxru1OIFm4SvWmDX3npvtgXWMokHLWgnBMkEGj45wHOGiXUaFPTqdf2/BhLuQANcvVlTPk1UJc4aErt3Xg2hsgGGKymrsL83UGwZaMZToNrmU1CzSnrxYZQkV+Gr2xdKf+oqeIXzzUfKMATeJ7loGsNpZpPHrJTmBbCNhx0ltFGCW6sIhesYLxM88I/wQjITQOs7xehOP0Z9zMKHCCpgrSSrtDxFKwsGf3zQGjAFAdLsflSU3E8NxCk1CqDtbzn2EZcCUA899vj00vgmW+q1NbmX8cd/R0B9tF/FtEMu5HLgBPt1Z5T8W3bg4L8Tf4qLoPBB6hVmrI+yPVx/bo+qhdlEMKtfCXpq6nuGZB12lA3wrrbnbqyNV8JD9fTyMrg0pYHoOFiqy2BTSx5nCDf63mPV1bWCfFPnqp72lpjNntwwlQhGFHOTdY5afAYJw5UOsJ4lgI9dBCWa97fgY0vjpXyQsUwY0wkRC3jwnxOPMLpWM15xJ9asUV59s6A3GCZMGKrEpaz6Xi88/FV7wT5ScYYo84ah2aupLmI44cEcHlUlz/YoLcSal2BEOdXj3jHzf0J4isUJL1+bIaxxW4rwRgl4TXIWvvoLYrXlQtP3SDGvMdoOPQBrjE+GyH9owVy+APteBGuyQ9EwzhOPsIEXukGolddHjZ3G0Cp9dtBk+unyWX4uLXgNbc5LHR4ao3ts5d1wzt9hhZJ43Z0Zo6zaCKwqDHP1eNxtJY1WhJ4AcMqAWqpg5MewBa+hbQnPNe4gbWtUxXMw/QBoqfcKUTQEEwiE8omzA4trjlvp9lwuIbJx4d0fX5bRVBcuuNM1aCLkcwMbT45ITPrdBU9HCNsnwpEjDl65c/OIJO4g2qMV9TFr6dbmDMBPaRfE2lc4vlqdJPfkgHVQnyhr+6K/MFNwHbT/ymaHu0HN0ogqm2beL5hOCIx+aoRsFqjOhrT8GyiXMYOS1P8hqYYWKvvlH9tyzyi/7WaYg/Yid6gVGHH9Mx9b8nMlA98d34ywqFKzdjGJwRwyoBFafDvgcWYLkofd18AcFrG2W4LJ/kK5HFWDtLlWgZ3Q8UOyOPs+THDLZGIssE+Tf9fzbcq0upOAlFf5vTcvwa5+d4CurMxi7DtsflCT1oqCaHfAXt+aeG1NZ7NriXRdsgQBXC0L6o6mrOPnT16gAngh9Mzl8JS3ymYZxILoUaoGIs2wtBQoc9qu+ON4+yAXnOco8w4rkTWYCPfgVN7w+/++Pz578fkSR9Ug85JNEoQQt1Z/aEPlV0TZswpncgxypk93OfkCgVfxK/DX8t5L1aDI3PqsVigxXj1eKmbzZuY8Ld+dM+Wz27cYuYdXqJmlvAXnTtbKpEErkm7nmPgASOoAn3dL34pKP9gXT2P1BLAwQUAAAACACXQWRajnwqCVgZAABBcQAALwAAAHBlcHRvbml6ZXIvemVyb19sb29rYWhlYWRfYmVsaWVmX3Byb3BhZ2F0aW9uLnB57T1rjxvJcd/3V3S4SMy5o3i7Z8sIFt4LbMkXCDnrhLMQAxkQxCzZuxzvcGZuHlrxBP33VFW/X0NyT07yIfwgcWe6q+td1dUPXrJy31Z8z+uhGMqmZs09u+NVye9Z2zVt8SCf1qxgLW+HcstfwIuBlzV76Ip2d3HJ1v+wz8V91+wZr8c9otl0A/szfBdP+0OvHj7woS9/4c39hXi13DT1h6YaEfX10HGuGn4l398Xm6Hp1kTA+oHXvBNkes3an7flZlBPxV/i1XBoy/pBvXkNzxfsj/VhwX4oe/j6fgSeLtibAQE33cXFxaYq+p79F++aH5rmsdjxYvuuax463vfYBVG4uWDw2QLnCa9+PbbbYuDbOT3HT8+r+4X+azN2HYhNUHHDynow74ZmKCrxpqdX9Ca70Q1ms5n+/qqoKr5lTztes2HHY/IvqoemK4cdyKFn/VB0gJdQipo/sX68o6GWFxrmu6Ir9hzo7/WjF/ozQYJ+hZ+/7crNTkOH14Te066pOBMiFKw6C+v7plse4ZP6vAew1IIV+2asBzQOhU3P5jD8ptnvx7ocSt5ngEUxsKLjgAPvgaw0wsuoHFpQkQutBPvi4xrglNsRMDyiCnbbG3ZfNYWjCxWoYb3h8s2pqkCyXQBfBxCjMwTrNw2QuSt6eAst74E5W3Z3OCqHsxUkQlggIdTACHYkDQ/F+VNZVfBmxKesap6AMGgnxARS+1A28KqpeTbNPXv4fVmXe3BQ3vAd/3ksO6FvBB59Eu/A2Ww46tFRlR0axBF6AVgOgJyBQe/Awwz8FEUqB+neTvYoukfUqxh41utpfeJA+WEo9/w42Sixe2Bpv4N+0GQDHhJdLQiFASf3yFtFvUWaZKlRHOhWPKCMm/bZbsnlQyB52cxgQbJ+IqfFP/INxR8i6O9jP2ist7778RgaaljxkTTMOCGLblJzUmvQlpZ3gMIeOHfH74URnOIYm7a1scLPWwjxAjaCMO03YCwwkOzDeNEB8I6V945+I81F1UGQOwjzKza7EpRgO6WvMka+64AHk4FyPvk2Ox5HQ823pR6Lp0ZYEzG1Rczn97Pvlfpuiqq863Re9ckZ4TM9soF+nmWnuv84BZNhQJAwHQoUBW/2oC0gLq162rmBV/pkD/N5yV5Zggd1AvMS8cNyr5/0wJ+XNpFTrmlaSCkXZQR1zE0F0rJdhmXTn4IhP7NvlODMIIIwqcOvgBSwmMMc01U5psye2S27or9lPnDLrunPIGW9Zd/Si6H4CFjcst9q6H8RiPYCrjKkV/QOUo0tukvwu4ogMEWMzuQFemGA34/1RrgP4acE0Qcj5ojPeAL7Z7+A5WHbR0a2pxGgL5dM4CciLyDSk/PQaNiOiw2ElfX319cMvAam2PBn0UFGpdVkvQYZDev1nPSBbaQFwdMb9ur998TIf8cnlnix6RI19QPawo8tQa1yysxz1AdUitUKGPsWQovbDVQD3R2gcCOzfmgmvsw/fV4Am0Dhe377vhutTIG6bqTkb7QOQFf1Nbcw1y1XJjoJB79u7td1swWG3dqkLkGV7nin384z0/GS/aVoe8zY4A0DLauH8h798hxY2rd8g3/pDO3Ve8EsiKlgzgWDBPbnkSM/+AO3kmMiCCGuYbhNV7bSmnC6k/dDJ1gIWH76bGHyk2AOuI6W5kgy1yEuFnfgVSLwy+16aOxRbmgihYMg/HyVoPTNa0FDOfRM8TMqDxIlgVSyOAluwSroIxS3h0SzfNjdNWOHZMl2vUeNaqKGo3+Upjnj/QfnMODQFZtHxSMhYfyrLdDlkMkIlLxhtCrIhnI0KY/8ijKRNU5AwOs+8LmnWVlIeM34FpwfBm42oJHg2FKdenT70KLcumhgD5CdUgnPtr6IeuAQirqI7earizRLlTfriRvbsSF+EkSEsAXfRzaRE+X0YJVrCYoHqsdvwFWCv0KGrFwMRSDQY9lypxgbwTOlwujlSuN9wXONHjtki7X05nKwuh2Wb1//seuKA3xf0rC//12ocknTkYlselwVA88Z1xr4TQ3JL0pbxwKUCIesMKLc+/6BPLvhY2ocj0Cr9xomhb8SQtU8nAFBgyD7utGlH6m0VBsyfhP+FD0d/05eHbKgQgYWDROZNZeOckEjZGjaWBfDUfhcGLUJfoLrf1b8Zf2uGastazYgRwj5EDCrA4h/w50eELt5N1CP/GoF/1MFIx4FLpye8Ta5hISESuwTvQL/vwTvAPY2lxCysJ9x7KqtjrLU6XqVz1RImK1W2YXHnB9rYIGVdDFMuiB/Kj5wNJHAE2MqBNw5IJfKhxo8BxgPeK69O28qhdP0hme3t2zmZ3gzV16GI75zzyWPFB8J+Ftq9eP9O9Fmtro4FQ+ReKZG9zyMIwgE9ka8/xO9nq1kKXVZoGG4YoIWaSRkNjzDOX2iTSxZOhPpVuA1z+Por69mqwVLvLtGrfEp6nmIwSWoQYnhYYBkj2biNcY4nCaPPehJ0ev6j6tr24ZsDDNllZ6fSx6krv38d5mTCv6Ni1ona8ZBZO2uf+9NoVJkihsxr0Mk3QjUTwYB0MYodhuYSbjJ6dvmSY6JzKHUluyMSgOyxsPBsz1hMU9YVsfbqthw6kGpkU6sTE4k89myC5Oxox5zyvF6jtRL7BT385TTW9HoNY7njCIgdL12aqvsjBi5ULIpf+FSqg2IA7KmbbnndU9O2UFZxDEMTxiwcObWz68yPzOseD33KMxLUHxBRXlKCunF3f/hESFO/wNHdLV4AfrYG+VExaTc0VYANcsi8fRtU2+j0wWZO1NtQtcElOqS/a6F2EHX5DfUUF+rKJu1dNdTXQ0HhlL2GqqsabZy/Z0cerKzauQFIJgjryWN0Fd9+1pWOxxBqmlEPnfwXdjjZysDJQ5BG+YUlEgiYUa3Gi5c1tHoFkXHMJiA5CUi8qXSyzVNOpRS+DprgV0tqaVLa+anADNZKvqB1w/DbobKg6qSiKLCoJxR8ih+lIjI4IPwcm8cL1/zBkAf8eUHicfmZ9Elc4ar5csFg3++JDUKNAD2U1KC4XaKqIAjb6UEtoaHqinJd3vmkeFWarzTGZYajbhy9oin8tJjnPItWDGdnJtTnLBqIyn7WonAMF8b55Hp9mTpk6FI/CXzIIIkQ7ECYagFUnSVkwrS6z3fr0dUc4pYQb16pkuaM3CmMJmc680Pc7vkmWXwegaxaOD9LLOByMrmGjsBtjo2mlcZ+4rNv4V/DGgcOssyHxnTh83B92UGK2+gJDYIxdRMEzSpBh5RzpzbCUo2XcFLIs8nDQCbR2qKFBIcqYYqnCPjxGiOz3cDhIMWHtZHUIwUVB08I+CjnDWzbBtD89TD6pUWVELGVIdVmNhgoqML87VSf/AW436eIyJ1ZtJsz44th+30DcCFGg54RJJFj8p8FeG6VfjVnLYAxPXWn+k7auC/jOptiEhYGtb4hBCjaKmMyMZGPfOQEFhQPRaRcawogaApGiu8DOyYwZC/9VEhPJJoxIb0xptWOcKICsaWxiWjRXY0WmQefDsi+SPadFGV0VNLD4LLiHwVEB/UpxUbAkATDBFzexUz0kbo1QIsbLw3tkC9Vy5FJmlaER9CfL5i/+rT7A/2DQsK2IoLweBRLpxewhb4YYSvKE9V7NI828uUAHlWr3wGitQgUBiCV/bDtAtUnW11Ec8MPF0VCBAFPmr+mvGOaJcE9w2z5v/6DyqhK0ZLREL+Yu6De2PGga+bETMgYjBkMV1Z3FVcrvpS3IL3YjVfxlGx/J+xF9+xRIH+xou89QlJthpK5deyp63MQhkUricsiNiJtYEva2VJuMu2aecJxL2cQijwTYoPCgXPEAwuRtmpbDtEiHTSbtXRRmubYiZopc1L6OZOVDo+jF1tE4JYFPVhbj+6jVccDQ0ZTQNDXsPweRRfp8BDtUe0KNBbWvLEHYFYNx0B8w4Y+YSWhlvZWFV0D5ztx2oo26rciC0gaTGuyYXJKWAv/Bl8hXHmQdssW4JP3hUtd/fCoEWHjRdylwh+LH9l25GYldCg63Eoqx7HhTDV7amY6I4isbK4jhYLT9HrJOhasOJj2d9eZTFMpDIh4VU19/DyylXe2zyg4hamzbhjhr/47ZUZQiqP13rauYj0+f9dyxmu5TwP4k0UtB2iENU0Idin769g7op6W0Go5ft2OLB/MbGXCuBNTauY3FuwjlNrLQlJA8yvl1cLBv/4yzzAD1fBp5yqBTC0zrgln2bNwo4S5tF7Fp2wZpJgu5R+ynGmC3zRds12HpCr7dkdPkOJgGGZsZD27VZ5srcAXKydZYtcegyVNakta/nNAti9SL+9xmWR7NKvJYvoIHrZbJCoXlvThaAOCMoY5TYuwbDv2Ldh1TD0c8KHSkKD9pLNz9GDSX1ADBdexVxLI3gaPrnE+TskqE9FV2PqKmPb/MorPRICEBO0nUeiQNDBFsPRGGNB/zrO3nCAZOToI6EjhVUeohkPIupjdM0K1L7aEeSbb1da/U6pRf8qnxQFEEsrzlW+0xwRfiJK8zyPhB87rnxluaITEwyDk//kFF8xlRn0681AscjKDUQmYJdim2EN/9zRHOmsTVhNtV3/iu4gyI5vod8jsG7XNFhGmYxNtHwysa1khUtymYPe2WNQdJ5bUOTmDn9X4sploEW8aeEsgyayE52huUv6sVRrlQWxIJmY/JOVmIj0MLRhV3TKgP0EbJUnMI8sJYUKEYFK/uZcyJrJ9h4eJ9fET9xXKQlOdsXPENuTg59Qi4Sy6FwkGQptL1J8aMrtWs+/5iFUcFWns//ESMo/boD+OF2i7hHicQ4acS14Hsd+DbdOUKupKADGFEk3ZcgmtPjPYwGhO0rcIqIiGR4KCEBiGSAhC9tu8nKF2xbBjEObEu+iQLwtIBgP3b6RSGzNdP0pC01KxGEWc6JWUMps0cKoxQDRt8D95bU+EqjOVuuoikepNjtEzTsBiBsEXpn503sKWREue8S4QRyJl0PSXgU1vONYtSOJ5FykN8envRKuPes9vqquOuWxQcTu2aWazw+NqRcagsIxAHxkzXl6pCOZKcBJJKPnDZMnPSIhbe8R8j+JfJaky1uUpg7JRyUYq+FB7zNFBx0CKrehzAKFnZ8ssvgIX05Wcfi4uM/+IJgd4XoYTS0LIznozOd5gviidrT1q0cTLf/va5ZDSEI7gsy/rPGw8XCguapeDLPrgrIQ6FUJqS5IafBkFTBZEJ8qAQIN16myXixQW1oPzb/1jEXF+KnudkinNSoAk2EklX9ce0Yjh8mvVzB5020MmJZWTt09jRRjsV2QkZfKoEIVwvY5QDOmZr/FYb7GA5R+KRizpuIjTazvelWfgv+25QdQVcJ2QURk9mrxJYNst2s+lnsM3vospHvw8YU4+BgokreaKjXI3uW50IuQ3hld+9RiZOOnkictJecSohG53i+lNhhbu0rTu8f8rWOZIytv21Vsr5QFKpxied1BkdQ4iS16Hvfy+L5QBdDeU5mtVnmMBStSQFcDF+4mu+npo1neD+hzoQB1Bp0T6VNShLwUNdhXjFC95H6qQ0Svfp36eHu/dINj7Psi3DvKPC1QRMvR6nDbroteFqYCPqkOcFnLwKJRND9IZ2cnKfAIwWWsrR0iK5R9EiYZoegzuVExCQCtUPZX1iepjXZJzvMulQb24p4VeUYe8SvYQ/mBy3OjwkTrYi/EgF8ydf/NtsRzCrjLTTfC6CmbaVXHaz4qLnfCqC7yXgCp9paqCI+q3Yp0sDu+eYR8coPbnnouCktR+zgWpDWHg33W1pJabP7rl5csP6nXr4OjyuqDUp/urytUMu1KTURTu2pXeYIHZoPA1FaMkNyIx/GWsGx+nUpdcBsCXhZgGquziba8Q/Knq7sRgeLHAkmLW3a751ASqyR+WeHIpexni0abuCxbbN0tTngCi2JHEJMc06yaDThAvEYCTO+uaSpA+XvwhPaRlKh5EmnDnFYXH90D3OIqnVfvY/fJDOB73LzVICC0xdoNnToVo1Nl0Qzy2WCWo8NoLAdy+64SwX/asUWP+licyqZml5eq1ORKySbAHsDa+zoVvSNTZJ8V1n7X9HT6ixBOF/nwQWdASu/F/ngMJKBF8Xs/lHhNzIc43VolBjldsPuYQTHlR316XFPKT6p1oH9PuYdHWkLkchr3Ch73bSsvM6ISAV0KOH1nIHUmJpjD5amVpPgNXT+NIkgjtS/MBOfINW6q902LF2r5BOtrg8wxauvGKvTbRS/vl6hfWPdG+WADXpknpJD2lVOkOD6Ac9m50D2Y6iEv2CrwngQcU2gvfKdrIIFxkGLh4rojxtOYpy/OUTqjp5wwp72ysu9Ldl92eK/ktmEvmXBxTzvecfbGtn/lts003Cd/Gblw6WphxOfOAh9NVRwavYxlGF4oSJ4+OvNoTPJc0fnHayatK8aPR4cflgxUlERWmw3cxT3ejvgS1aCnKxXY/NY+qksqIlt/Y52X+cAxave2L7JOKak98QuXzYlq1VfYOhO80B3lpSa+lxeb3519jxB+5dqD8LASN0GNPGrcHswKhbgmEa9PKKrNWFHKoLFPTSzNDUq6qX+Rku76CI1f2hhiXuBdldNsxY1SIkcQZ9f9Hdp0yNxfPEH81/IhsrqMrQDZW6dtZsFoFQf8/mB0hJINx3q/sxyXF60hb8EWnrN98ad3eM/E4CmrG++s07SEXTQmZkEcsnyKP/9NjeBaXzRQZ25VIQ0pNORnJAWUHwY3eLz/8fWPN+pqE7qCAWcFd1yaJEG01eHf2HzsQSuktyCx4TZ3f++RJsZn3bGacYxPQTqmEihbDSfPrZpF2khNONjmGdPwlce4UpiQuCRC4IPXAvkXVCz05QuhZSkrNPmkvBhSDhtcivLciV88E00m5KmiZIw19t6GcN1BDVVaWfP5+00SMpWL7mXs/Kt65cBJnG7+ouWMGJ6nz0zPms4GY8bkYmwonXf4VfcI4LhnWoRm7s2Nk2PqUmwKduipugP5pqFhDQQ8SN2qvVrt97JNCrdD04Dvrg/a3pbsPWShv8Gs7yB2GNCNuuwlNNYpjDeouhO243hHecQkH9k/Q96C68rRfU7PzpTw8+ity1xCy+6hrEVORIkFXQ0J4q3EFgeZyps4HV7hstbngCOGaB0mDSrOE1sQFcjjtTn8qD1pdq9kRQ4/EXfwjA3+uRdHFXt8UF4GrO+Nivuk5H7UdFHZ6xFtGNkmStvG6Q5B3KBENoD7k46zxt7gGj8s8+U2twZ9mwdQR136XCsFPv0sjfpI5NNHl0BEzzllM8Hx9EEN4bJTxNkW+x4y2B6ze51w06HgO8zCxXVRJcwRulJwpec/jzgRF/mwLKwM1i3Z6iwm3TCpr3k1dtaMA7hXc0DzeEHDvVrS9Rf6OGfEU7hsCc8gWWhYlzwFR+Y1PxER/8IscybJgnZxcXHpXAo97LpmfNjpZRW8CFteiom5l/mtAbmS8oO8jdT+LYH+gmqyEipfQ8e17mgUEh05bv7dDGv71y/0taj2Nbor95cFojcp4yddyzq7jkU1LHFX4nHB21cp4hWxK/cm5HfiFnRU1QPEdry4tL7nHRdRR3Bc3Hcc1mkWUnLWTcQH9knZyU3bAL4Y9mlHXXFXVhD/hQ7KklOazxX9NAgEuqEoAb7Fb9bc/R3mQVg/kPNZg7DMq/XN7vZYMeHgw2fV3ZI1N951+BMGuvJ2x4cnrME7qwROKa4riUm1Dfd/oxR3vAynS3Aw0FgN/Rqlfp7vcRKCoL+8wRjaXkTtYundUX+1oKCV0iK5W4V8Xbn9uBAuwPNyyc7G05XycvzwUunwRFRI4FLO1qmcI+E0+g49c41j5k/aTV1sq5NmofvAI3UKTED0VuWCqwfToJanFu5D/7YI3mu1D18Fskyte/r6pdjnB6KJuq2nJSB6PLdxkq7ISGQjsIjIFGKTWNtDO6adKNQcQt6m/zA/1zzkCDFryFxn/Ves0f1A3lgcHmTvLHOV42K+sQQ82H1ZOd7KxUvQdo63trAUnS2vL++/Vj+xgeV2GYad3x7CdEfkQsUwdOUd7pXAhQrMZqtxa7InfUO5AiORdx3R/VhVa5ss9B6mMsoPNyy3X+fwaJVf2xynR+IKK/hGhU1b++BZLytGny9sFWm3y9dgwN/jT6Qs0alSe2HUAVoLTP1Ae29nVFGYZUupKmhyvLsVa79Sq8Axqys7LGdsrJE4ua/WGBwB5g0qijG4omp3RZBiQBgKH3b8YaxA/OAVxCq0nZKAIgQdlP2rvUXXV/BxMhzZBfdDLa+ufv/8HAcgnPDbJkIwnpH8NNb98d9yod/jUj+IReUCmPuLn6Iyl/8DZ+mXy2qf6bL0IH9DZxjpxGirfy8HFLODl531cwI0CSjxjfg1CaxSWH/qTKpQg5Ih6QTImACYmhj0CIULhyCaSLz663+yOZrghn7FaezlbqMKkzv2BN0GPIPRiKYv0H8sMPqBXeOvM2VeGuD5UDxiYSfGc49nWazTEgapsBYmHvZz0l/6Ia1iYWvodG/S114UmeItiy2uk7u/BuEHAaweWEsyKE+YI5j5gdceXDQVi+aABUhdXoPqLgvar2grwEes2tbAdFy3b/ZtU+PxQh+0ALIK0y3puYZDK37y4thMxicxNGbHficMVpia4wNPiH82vtnFfwNQSwMEFAAAAAgAs256Wmu8pElzAQAAuAIAACEAAABwZXB0b25pemVyLTAuMS5kaXN0LWluZm8vTUVUQURBVEFtkstOKzEMhvd5Cr9Apz1FYhGJo4NgQSVuoujszYw7Y5hcSBzK8PS4Le2IwmKiyffb8W8nNyTYoODkP6XMwVuYV3Nzi44sRIoSPH9QMgd1Vv0xy+IcpsHCY0dwfwgC7PuwzjCEAhKAMHM/wIp9A6EIrDuuOxB8R8BE4EIW6PmFNCYmyuQF2AOCU0cxBaHguM6Q0cWeIKxUFdJAqcxVcDSJ2KrHTiRmO522LF15qurgpvrFbe509DafzWbmvEgXktpG/4xwFfosxP4LT8gh91YNqlh1X+K/0qqx6onMRY8584pJD7hPoU3oHPsWrtG3Ra2AVT7oSX7zd/It/ppr8nkbcrdcwHnU/t6o2exvFo97+VvKXaSEsimwHNSJ2+XCwjcUSRcv5oFeC+tAJruyFv6enVSnI77kLBZ8cXE4hrnmn9CTrEN6eT/mEX2D2VwOHnWqFnA7r+P9bn4jrQ/NjKzb39uI0r5Wo7V+wXHb2yjk3dszn1BLAwQUAAAACACzbnparo7441wAAABbAAAAHgAAAHBlcHRvbml6ZXItMC4xLmRpc3QtaW5mby9XSEVFTAXBMQrDMAwF0F2n0NgMMgmhtPgCpVspIZld+LQBIwVZHnL7vrf9gCorvO2mmac00gMKL2GeuSH6EWa18eV2Tfc0DvQ2C3k2eXVH3T+ZwztoKd/MxzmLmkKKnkR/UEsDBBQAAAAIALNuelrXjLIWDQAAAAsAAAAmAAAAcGVwdG9uaXplci0wLjEuZGlzdC1pbmZvL3RvcF9sZXZlbC50eHQrSC0oyc/LrEot4gIAUEsDBBQAAAAIALNuelo3rUXHnAQAAKwHAAAfAAAAcGVwdG9uaXplci0wLjEuZGlzdC1pbmZvL1JFQ09SRHXVydKjNhAA4HueBf4xYLZDDtiAjVe8Ar6oBAgQm1jN8vThr8okeJI5UAUcvm5a3U2BiobkeETVDwBwjhsAvoqBqiPI8sKfEQefcdC5TVgWcghNtjvd+bsfRLggK002R3c47Uojt/gjJcjcH8W/GsxhOtQIhBX2QY1g5UUzmOisG+6kO3jmizQ/IzczsNMLtSWrSmk9ktNxNLN13l/PF4rlJPFDrio4gLbBaT0TQ9Om22ilder69dQ5Ln8eDu8hsIVVXwo6NxiLrrgOQXyDhBIWwhz0SP4m6QSSHDQVQnNV9C6slLP0a/RoPnry+3EVyE5uWoZODlfZ06By2y7VJXpQAitIcxb1TQW9BjSwn+DaIxWaJ8yM4u5dXi32MfABJy/NY3IyaPPcmqy55niz5IznVlWUXOwojuU+Mg4ml1RTbWERgRDlqILf2c/027vR0apWICbJtWKbWL+IoStX7BrpchSOmq/Yrnc8mdyFYhaT/8GjxovA9wvsoxrA3AcBThtUfX8LnEUZmJMEHzbqN94i4L1EChbH18MN7gclklJu5x1ilJanRWx0UxR2MQ+Sey4GFcyTeVFU7QFDDeeCJl/06O0wMDjhE7nVO79Lx71kKArMG5XbSjW15Nm5V8CqRtUcu47WOqsjH/f80NnFQF/iEAvyRtkfbH0RXhRHGGIi2CVOKIaRPkpQpKQB04G1afNxaMtqfeQrzRrkMN6n+cNJ3Qv/5Em/2Gb7mxm67el4pZeATyiWWS4/yNLHXjPDbIbsCpgwe7ZZOE5sm47Dt/JR7K2z3hJxjJ+wHjJN4LnvIVh+Nm1RkXDKrwYprpvvDpjBz+Ht26isxUG4BWq9Em/pbdXZ61uInZdrCjSt1151aFukUBwjz9kKlS2qG5BN8xt+oJ7MX0YAuld3TMetGzwNmI+iPBwTAw9xxZT9OvA0zGW8Q/Hix8R+dw3w0nZKtMJ5OENLs+U911OHrf2KpW0Djs7BV4K8pnPYr6u3NcYDBMV42S0pVhDZX9Rprv6b6HoErYmfqnL1I3M4nbWuHR7yPa6ZUO9Hy0Vv+Lbqvb70lpTAyB9VbXP8/QQ8kmXTvQenMZvRTSEiR9izQf/W99qRze7a3pTBtghWe8Upbctl0EtljETyKEni2P+jQ/T3SiDZAIKKZKAg7bwtjPAVRH2+3MeifLjotDXe5Gvvogcvnjx49ndqGjjl3TRbQrH84iNIh3AYNb+O6Z57JXa1BXhzMnMA7Gwju7brdidi0SLs4t0WsIeaeb/WCiWL4scgTBcBKSEJjBD0gYtSjAIwdV8Bw1+XTvCq5VHFitSDUjOsUxiBIVUd1qLpwYKVuQwP68IaNjoXUqwkf/ww6MUX8+VPvUzjPCA/jtpdUZW78pPm8bBxnDRW1Nt12sXtMIqMGbBFdGZbQpRHkYhn1btno+9MfyLh97C11bTDP6XOZf1+6pEpLPb5Pdl0YpcI2c50fOBfbrH2ymx/5R/XBi1RMvN7tCEFSNEbpV9N3/zEkwi7MNzY286SrpKhZXeJM1qJvb1QZB2Ky4tOfVw66djW0/r5PX7V1uerSlF//AVQSwECFAMUAAAACABvdFpaxmSz50IBAAC1AgAAFgAAAAAAAAAAAAAApIEAAAAAcGVwdG9uaXplci9fX2luaXRfXy5weVBLAQIUAxQAAAAIAHVFY1ru21C2xwMAAFMJAAAhAAAAAAAAAAAAAACkgXYBAABwZXB0b25pemVyL2FuYWx5c2VfZ3JpZF9zZWFyY2gucHlQSwECFAMUAAAACAB7bF5ZDJXGrvsAAABeAgAAGQAAAAAAAAAAAAAApIF8BQAAcGVwdG9uaXplci9hcnJheV91dGlscy5weVBLAQIUAxQAAAAIAHxJYVk+QFO0tgYAAHwYAAAeAAAAAAAAAAAAAACkga4GAABwZXB0b25pemVyL2NvbnZvbHV0aW9uX3RyZWUucHlQSwECFAMUAAAACABvdFpapnWa/3MDAACkDAAAIgAAAAAAAAAAAAAApIGgDQAAcGVwdG9uaXplci9leHRyYWN0X3RheG9uX3Njb3Jlcy5weVBLAQIUAxQAAAAIAClFe1mwXj2OUwsAAFMoAAAlAAAAAAAAAAAAAACkgVMRAABwZXB0b25pemVyL2ZhY3Rvcl9ncmFwaF9nZW5lcmF0aW9uLnB5UEsBAhQDFAAAAAgARFBbWur73GzXAQAA/AMAACwAAAAAAAAAAAAAAKSB6RwAAHBlcHRvbml6ZXIvZmV0Y2hfcGVwdGlkZXNfYW5kX2ZpbHRlcl90YXhhLnB5UEsBAhQDFAAAAAgASmt6WstzhGClAAAAxAEAABgAAAAAAAAAAAAAAKSBCh8AAHBlcHRvbml6ZXIvbmNiaV9yYW5rcy5weVBLAQIUAxQAAAAIAHlZYVnnHUDn9QEAAJ8EAAAVAAAAAAAAAAAAAACkgeUfAABwZXB0b25pemVyL3BhcnNlcnMucHlQSwECFAMUAAAACABvdFpa+Hhn2JYDAABgCAAAGgAAAAAAAAAAAAAApIENIgAAcGVwdG9uaXplci9wbG90X3Jlc3VsdHMucHlQSwECFAMUAAAACAB7bF5Zf5w/CzwWAABuWwAAFAAAAAAAAAAAAAAApIHbJQAAcGVwdG9uaXplci9wcWRpY3QucHlQSwECFAMUAAAACABsck5ZZHmhUMMAAAA/AQAAHwAAAAAAAAAAAAAApIFJPAAAcGVwdG9uaXplci9wcm9ncmVzc19saXN0ZW5lci5weVBLAQIUAxQAAAAIAHxJYVnQV+u6RAEAAEECAAAdAAAAAAAAAAAAAACkgUk9AABwZXB0b25pemVyL3JlcXVlc3RfbWFuYWdlci5weVBLAQIUAxQAAAAIACtwdVlfZmYyAQMAAHAKAAAdAAAAAAAAAAAAAACkgcg+AABwZXB0b25pemVyL3RheGFfY2x1c3RlcmluZy5weVBLAQIUAxQAAAAIAA5pelpr99+7RAYAADQYAAAbAAAAAAAAAAAAAACkgQRCAABwZXB0b25pemVyL3RheG9uX21hbmFnZXIucHlQSwECFAMUAAAACACSc2Ra4451o30IAACAIgAAIgAAAAAAAAAAAAAApIGBSAAAcGVwdG9uaXplci91bmlwZXB0X2NvbW11bmljYXRvci5weVBLAQIUAxQAAAAIABl0WlrnbSnq+gMAAMYJAAAsAAAAAAAAAAAAAACkgT5RAABwZXB0b25pemVyL3VuaXBlcHRfZ2V0X3RheG9ub215X2Zyb21fcG91dC5weVBLAQIUAxQAAAAIAKpuelo2ANzsagwAAC0mAAAZAAAAAAAAAAAAAACkgYJVAABwZXB0b25pemVyL3dlaWdodF90YXhhLnB5UEsBAhQDFAAAAAgAl0FkWo58KglYGQAAQXEAAC8AAAAAAAAAAAAAAKSBI2IAAHBlcHRvbml6ZXIvemVyb19sb29rYWhlYWRfYmVsaWVmX3Byb3BhZ2F0aW9uLnB5UEsBAhQDFAAAAAgAs256Wmu8pElzAQAAuAIAACEAAAAAAAAAAAAAAKSByHsAAHBlcHRvbml6ZXItMC4xLmRpc3QtaW5mby9NRVRBREFUQVBLAQIUAxQAAAAIALNuelqujvjjXAAAAFsAAAAeAAAAAAAAAAAAAACkgXp9AABwZXB0b25pemVyLTAuMS5kaXN0LWluZm8vV0hFRUxQSwECFAMUAAAACACzbnpa14yyFg0AAAALAAAAJgAAAAAAAAAAAAAApIESfgAAcGVwdG9uaXplci0wLjEuZGlzdC1pbmZvL3RvcF9sZXZlbC50eHRQSwECFAMUAAAACACzbnpaN61Fx5wEAACsBwAAHwAAAAAAAAAAAAAAtIFjfgAAcGVwdG9uaXplci0wLjEuZGlzdC1pbmZvL1JFQ09SRFBLBQYAAAAAFwAXAOwGAAA8gwAAAAA= \ No newline at end of file diff --git a/peptonizer_ts/src/workers/lib/perform_taxa_weighing.py b/peptonizer_ts/src/workers/lib/perform_taxa_weighing.py deleted file mode 100644 index 3003f03..0000000 --- a/peptonizer_ts/src/workers/lib/perform_taxa_weighing.py +++ /dev/null @@ -1,22 +0,0 @@ -import peptonizer - -# The PSM input should be provided to the parser as a list of strings -pep_scores = globals().get('peptides_scores') -pep_counts = globals().get('peptides_counts') -rank = globals().get('rank') -taxa_in_graph = globals().get('taxa_in_graph') -peptides_taxa = globals().get('peptides_taxa') - -# Infer the taxa weights for these peptide sequences -sequence_scores_df, taxa_weights_df = peptonizer.perform_taxa_weighing( - peptides_taxa, - pep_scores, - pep_counts, - taxa_in_graph, - peptonizer.UnipeptCommunicator(), - rank -) - -# Return a CSV-representation of the taxa_weights dataframe -output = [sequence_scores_df.to_csv(), taxa_weights_df.to_csv()] -output diff --git a/peptonizer_ts/vite.config.js b/peptonizer_ts/vite.config.js index d8ca7ab..abe763a 100644 --- a/peptonizer_ts/vite.config.js +++ b/peptonizer_ts/vite.config.js @@ -1,6 +1,7 @@ import { defineConfig } from "vite"; import path from "path"; import dts from 'vite-plugin-dts'; +import wasm from 'vite-plugin-wasm'; export default defineConfig({ assetsInclude: ['**/*.py', '**/*.whl'], @@ -14,15 +15,6 @@ export default defineConfig({ formats: ['es'], // Switch to ESM format, filename: "peptonizer.js" }, - rollupOptions: { - // Externalize dependencies you don't want to bundle - external: ['pyodide'], // Example, add others like 'react' if neededp - output: { - globals: { - pyodide: 'Pyodide', // Define the global name for external libraries - } - } - }, }, plugins: [ // Use `vite-plugin-dts` for type bundling @@ -31,6 +23,6 @@ export default defineConfig({ insertTypesEntry: true, // Automatically add the "types" field in `package.json` rollupTypes: true, // Enable bundling all `.d.ts` files into a single file }), + wasm(), ], - optimizeDeps: { exclude: ["pyodide"] } }); diff --git a/peptonizer_ts/yarn.lock b/peptonizer_ts/yarn.lock deleted file mode 100644 index 91725c2..0000000 --- a/peptonizer_ts/yarn.lock +++ /dev/null @@ -1,989 +0,0 @@ -# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. -# yarn lockfile v1 - - -"@babel/code-frame@^7.24.2": - version "7.26.2" - resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.26.2.tgz#4b5fab97d33338eff916235055f0ebc21e573a85" - integrity sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ== - dependencies: - "@babel/helper-validator-identifier" "^7.25.9" - js-tokens "^4.0.0" - picocolors "^1.0.0" - -"@babel/helper-string-parser@^7.25.9": - version "7.25.9" - resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz#1aabb72ee72ed35789b4bbcad3ca2862ce614e8c" - integrity sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA== - -"@babel/helper-validator-identifier@^7.25.9": - version "7.25.9" - resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz#24b64e2c3ec7cd3b3c547729b8d16871f22cbdc7" - integrity sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ== - -"@babel/parser@^7.25.3": - version "7.26.9" - resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.26.9.tgz#d9e78bee6dc80f9efd8f2349dcfbbcdace280fd5" - integrity sha512-81NWa1njQblgZbQHxWHpxxCzNsa3ZwvFqpUg7P+NNUU6f3UU2jBEg4OlF/J6rl8+PQGh1q6/zWScd001YwcA5A== - dependencies: - "@babel/types" "^7.26.9" - -"@babel/types@^7.26.9": - version "7.26.9" - resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.26.9.tgz#08b43dec79ee8e682c2ac631c010bdcac54a21ce" - integrity sha512-Y3IR1cRnOxOCDvMmNiym7XpXQ93iGDDPHx+Zj+NM+rg0fBaShfQLkg+hKPaZCEvg5N/LeCo4+Rj/i3FuJsIQaw== - dependencies: - "@babel/helper-string-parser" "^7.25.9" - "@babel/helper-validator-identifier" "^7.25.9" - -"@esbuild/aix-ppc64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.25.0.tgz#499600c5e1757a524990d5d92601f0ac3ce87f64" - integrity sha512-O7vun9Sf8DFjH2UtqK8Ku3LkquL9SZL8OLY1T5NZkA34+wG3OQF7cl4Ql8vdNzM6fzBbYfLaiRLIOZ+2FOCgBQ== - -"@esbuild/android-arm64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.25.0.tgz#b9b8231561a1dfb94eb31f4ee056b92a985c324f" - integrity sha512-grvv8WncGjDSyUBjN9yHXNt+cq0snxXbDxy5pJtzMKGmmpPxeAmAhWxXI+01lU5rwZomDgD3kJwulEnhTRUd6g== - -"@esbuild/android-arm@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.25.0.tgz#ca6e7888942505f13e88ac9f5f7d2a72f9facd2b" - integrity sha512-PTyWCYYiU0+1eJKmw21lWtC+d08JDZPQ5g+kFyxP0V+es6VPPSUhM6zk8iImp2jbV6GwjX4pap0JFbUQN65X1g== - -"@esbuild/android-x64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.25.0.tgz#e765ea753bac442dfc9cb53652ce8bd39d33e163" - integrity sha512-m/ix7SfKG5buCnxasr52+LI78SQ+wgdENi9CqyCXwjVR2X4Jkz+BpC3le3AoBPYTC9NHklwngVXvbJ9/Akhrfg== - -"@esbuild/darwin-arm64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.25.0.tgz#fa394164b0d89d4fdc3a8a21989af70ef579fa2c" - integrity sha512-mVwdUb5SRkPayVadIOI78K7aAnPamoeFR2bT5nszFUZ9P8UpK4ratOdYbZZXYSqPKMHfS1wdHCJk1P1EZpRdvw== - -"@esbuild/darwin-x64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.25.0.tgz#91979d98d30ba6e7d69b22c617cc82bdad60e47a" - integrity sha512-DgDaYsPWFTS4S3nWpFcMn/33ZZwAAeAFKNHNa1QN0rI4pUjgqf0f7ONmXf6d22tqTY+H9FNdgeaAa+YIFUn2Rg== - -"@esbuild/freebsd-arm64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.0.tgz#b97e97073310736b430a07b099d837084b85e9ce" - integrity sha512-VN4ocxy6dxefN1MepBx/iD1dH5K8qNtNe227I0mnTRjry8tj5MRk4zprLEdG8WPyAPb93/e4pSgi1SoHdgOa4w== - -"@esbuild/freebsd-x64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.25.0.tgz#f3b694d0da61d9910ec7deff794d444cfbf3b6e7" - integrity sha512-mrSgt7lCh07FY+hDD1TxiTyIHyttn6vnjesnPoVDNmDfOmggTLXRv8Id5fNZey1gl/V2dyVK1VXXqVsQIiAk+A== - -"@esbuild/linux-arm64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.25.0.tgz#f921f699f162f332036d5657cad9036f7a993f73" - integrity sha512-9QAQjTWNDM/Vk2bgBl17yWuZxZNQIF0OUUuPZRKoDtqF2k4EtYbpyiG5/Dk7nqeK6kIJWPYldkOcBqjXjrUlmg== - -"@esbuild/linux-arm@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.25.0.tgz#cc49305b3c6da317c900688995a4050e6cc91ca3" - integrity sha512-vkB3IYj2IDo3g9xX7HqhPYxVkNQe8qTK55fraQyTzTX/fxaDtXiEnavv9geOsonh2Fd2RMB+i5cbhu2zMNWJwg== - -"@esbuild/linux-ia32@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.25.0.tgz#3e0736fcfab16cff042dec806247e2c76e109e19" - integrity sha512-43ET5bHbphBegyeqLb7I1eYn2P/JYGNmzzdidq/w0T8E2SsYL1U6un2NFROFRg1JZLTzdCoRomg8Rvf9M6W6Gg== - -"@esbuild/linux-loong64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.25.0.tgz#ea2bf730883cddb9dfb85124232b5a875b8020c7" - integrity sha512-fC95c/xyNFueMhClxJmeRIj2yrSMdDfmqJnyOY4ZqsALkDrrKJfIg5NTMSzVBr5YW1jf+l7/cndBfP3MSDpoHw== - -"@esbuild/linux-mips64el@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.25.0.tgz#4cababb14eede09248980a2d2d8b966464294ff1" - integrity sha512-nkAMFju7KDW73T1DdH7glcyIptm95a7Le8irTQNO/qtkoyypZAnjchQgooFUDQhNAy4iu08N79W4T4pMBwhPwQ== - -"@esbuild/linux-ppc64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.25.0.tgz#8860a4609914c065373a77242e985179658e1951" - integrity sha512-NhyOejdhRGS8Iwv+KKR2zTq2PpysF9XqY+Zk77vQHqNbo/PwZCzB5/h7VGuREZm1fixhs4Q/qWRSi5zmAiO4Fw== - -"@esbuild/linux-riscv64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.25.0.tgz#baf26e20bb2d38cfb86ee282dff840c04f4ed987" - integrity sha512-5S/rbP5OY+GHLC5qXp1y/Mx//e92L1YDqkiBbO9TQOvuFXM+iDqUNG5XopAnXoRH3FjIUDkeGcY1cgNvnXp/kA== - -"@esbuild/linux-s390x@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.25.0.tgz#8323afc0d6cb1b6dc6e9fd21efd9e1542c3640a4" - integrity sha512-XM2BFsEBz0Fw37V0zU4CXfcfuACMrppsMFKdYY2WuTS3yi8O1nFOhil/xhKTmE1nPmVyvQJjJivgDT+xh8pXJA== - -"@esbuild/linux-x64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.25.0.tgz#08fcf60cb400ed2382e9f8e0f5590bac8810469a" - integrity sha512-9yl91rHw/cpwMCNytUDxwj2XjFpxML0y9HAOH9pNVQDpQrBxHy01Dx+vaMu0N1CKa/RzBD2hB4u//nfc+Sd3Cw== - -"@esbuild/netbsd-arm64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.0.tgz#935c6c74e20f7224918fbe2e6c6fe865b6c6ea5b" - integrity sha512-RuG4PSMPFfrkH6UwCAqBzauBWTygTvb1nxWasEJooGSJ/NwRw7b2HOwyRTQIU97Hq37l3npXoZGYMy3b3xYvPw== - -"@esbuild/netbsd-x64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.25.0.tgz#414677cef66d16c5a4d210751eb2881bb9c1b62b" - integrity sha512-jl+qisSB5jk01N5f7sPCsBENCOlPiS/xptD5yxOx2oqQfyourJwIKLRA2yqWdifj3owQZCL2sn6o08dBzZGQzA== - -"@esbuild/openbsd-arm64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.0.tgz#8fd55a4d08d25cdc572844f13c88d678c84d13f7" - integrity sha512-21sUNbq2r84YE+SJDfaQRvdgznTD8Xc0oc3p3iW/a1EVWeNj/SdUCbm5U0itZPQYRuRTW20fPMWMpcrciH2EJw== - -"@esbuild/openbsd-x64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.25.0.tgz#0c48ddb1494bbc2d6bcbaa1429a7f465fa1dedde" - integrity sha512-2gwwriSMPcCFRlPlKx3zLQhfN/2WjJ2NSlg5TKLQOJdV0mSxIcYNTMhk3H3ulL/cak+Xj0lY1Ym9ysDV1igceg== - -"@esbuild/sunos-x64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.25.0.tgz#86ff9075d77962b60dd26203d7352f92684c8c92" - integrity sha512-bxI7ThgLzPrPz484/S9jLlvUAHYMzy6I0XiU1ZMeAEOBcS0VePBFxh1JjTQt3Xiat5b6Oh4x7UC7IwKQKIJRIg== - -"@esbuild/win32-arm64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.25.0.tgz#849c62327c3229467f5b5cd681bf50588442e96c" - integrity sha512-ZUAc2YK6JW89xTbXvftxdnYy3m4iHIkDtK3CLce8wg8M2L+YZhIvO1DKpxrd0Yr59AeNNkTiic9YLf6FTtXWMw== - -"@esbuild/win32-ia32@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.25.0.tgz#f62eb480cd7cca088cb65bb46a6db25b725dc079" - integrity sha512-eSNxISBu8XweVEWG31/JzjkIGbGIJN/TrRoiSVZwZ6pkC6VX4Im/WV2cz559/TXLcYbcrDN8JtKgd9DJVIo8GA== - -"@esbuild/win32-x64@0.25.0": - version "0.25.0" - resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.25.0.tgz#c8e119a30a7c8d60b9d2e22d2073722dde3b710b" - integrity sha512-ZENoHJBxA20C2zFzh6AI4fT6RraMzjYw4xKWemRTRmRVtN9c5DcH9r/f2ihEkMjOW5eGgrwCslG/+Y/3bL+DHQ== - -"@jridgewell/sourcemap-codec@^1.5.0": - version "1.5.0" - resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz#3188bcb273a414b0d215fd22a58540b989b9409a" - integrity sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ== - -"@microsoft/api-extractor-model@7.30.3": - version "7.30.3" - resolved "https://registry.yarnpkg.com/@microsoft/api-extractor-model/-/api-extractor-model-7.30.3.tgz#d1256b6955c8c2a1115e0cfe99e1e8f9802e52cc" - integrity sha512-yEAvq0F78MmStXdqz9TTT4PZ05Xu5R8nqgwI5xmUmQjWBQ9E6R2n8HB/iZMRciG4rf9iwI2mtuQwIzDXBvHn1w== - dependencies: - "@microsoft/tsdoc" "~0.15.1" - "@microsoft/tsdoc-config" "~0.17.1" - "@rushstack/node-core-library" "5.11.0" - -"@microsoft/api-extractor@^7.50.1": - version "7.51.1" - resolved "https://registry.yarnpkg.com/@microsoft/api-extractor/-/api-extractor-7.51.1.tgz#fdedb120c0905de4d3c71572a1af6c5d69019c08" - integrity sha512-VoFvIeYXme8QctXDkixy1KIn750kZaFy2snAEOB3nhDFfbBcJNEcvBrpCIQIV09MqI4g9egKUkg+/12WMRC77w== - dependencies: - "@microsoft/api-extractor-model" "7.30.3" - "@microsoft/tsdoc" "~0.15.1" - "@microsoft/tsdoc-config" "~0.17.1" - "@rushstack/node-core-library" "5.11.0" - "@rushstack/rig-package" "0.5.3" - "@rushstack/terminal" "0.15.0" - "@rushstack/ts-command-line" "4.23.5" - lodash "~4.17.15" - minimatch "~3.0.3" - resolve "~1.22.1" - semver "~7.5.4" - source-map "~0.6.1" - typescript "5.7.3" - -"@microsoft/tsdoc-config@~0.17.1": - version "0.17.1" - resolved "https://registry.yarnpkg.com/@microsoft/tsdoc-config/-/tsdoc-config-0.17.1.tgz#e0f0b50628f4ad7fe121ca616beacfe6a25b9335" - integrity sha512-UtjIFe0C6oYgTnad4q1QP4qXwLhe6tIpNTRStJ2RZEPIkqQPREAwE5spzVxsdn9UaEMUqhh0AqSx3X4nWAKXWw== - dependencies: - "@microsoft/tsdoc" "0.15.1" - ajv "~8.12.0" - jju "~1.4.0" - resolve "~1.22.2" - -"@microsoft/tsdoc@0.15.1", "@microsoft/tsdoc@~0.15.1": - version "0.15.1" - resolved "https://registry.yarnpkg.com/@microsoft/tsdoc/-/tsdoc-0.15.1.tgz#d4f6937353bc4568292654efb0a0e0532adbcba2" - integrity sha512-4aErSrCR/On/e5G2hDP0wjooqDdauzEbIq8hIkIe5pXV0rtWJZvdCEKL0ykZxex+IxIwBp0eGeV48hQN07dXtw== - -"@rollup/pluginutils@^5.1.4": - version "5.1.4" - resolved "https://registry.yarnpkg.com/@rollup/pluginutils/-/pluginutils-5.1.4.tgz#bb94f1f9eaaac944da237767cdfee6c5b2262d4a" - integrity sha512-USm05zrsFxYLPdWWq+K3STlWiT/3ELn3RcV5hJMghpeAIhxfsUIg6mt12CBJBInWMV4VneoV7SfGv8xIwo2qNQ== - dependencies: - "@types/estree" "^1.0.0" - estree-walker "^2.0.2" - picomatch "^4.0.2" - -"@rollup/rollup-android-arm-eabi@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.34.9.tgz#661a45a4709c70e59e596ec78daa9cb8b8d27604" - integrity sha512-qZdlImWXur0CFakn2BJ2znJOdqYZKiedEPEVNTBrpfPjc/YuTGcaYZcdmNFTkUj3DU0ZM/AElcM8Ybww3xVLzA== - -"@rollup/rollup-android-arm64@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.34.9.tgz#128fe8dd510d880cf98b4cb6c7add326815a0c4b" - integrity sha512-4KW7P53h6HtJf5Y608T1ISKvNIYLWRKMvfnG0c44M6In4DQVU58HZFEVhWINDZKp7FZps98G3gxwC1sb0wXUUg== - -"@rollup/rollup-darwin-arm64@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.34.9.tgz#363467bc49fd0b1e17075798ac8e9ad1e1e29535" - integrity sha512-0CY3/K54slrzLDjOA7TOjN1NuLKERBgk9nY5V34mhmuu673YNb+7ghaDUs6N0ujXR7fz5XaS5Aa6d2TNxZd0OQ== - -"@rollup/rollup-darwin-x64@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.34.9.tgz#c2fe3d85fffe47f0ed0f076b3563ada22c8af19c" - integrity sha512-eOojSEAi/acnsJVYRxnMkPFqcxSMFfrw7r2iD9Q32SGkb/Q9FpUY1UlAu1DH9T7j++gZ0lHjnm4OyH2vCI7l7Q== - -"@rollup/rollup-freebsd-arm64@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.34.9.tgz#d95bd8f6eaaf829781144fc8bd2d5d71d9f6a9f5" - integrity sha512-2lzjQPJbN5UnHm7bHIUKFMulGTQwdvOkouJDpPysJS+QFBGDJqcfh+CxxtG23Ik/9tEvnebQiylYoazFMAgrYw== - -"@rollup/rollup-freebsd-x64@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.34.9.tgz#c3576c6011656e4966ded29f051edec636b44564" - integrity sha512-SLl0hi2Ah2H7xQYd6Qaiu01kFPzQ+hqvdYSoOtHYg/zCIFs6t8sV95kaoqjzjFwuYQLtOI0RZre/Ke0nPaQV+g== - -"@rollup/rollup-linux-arm-gnueabihf@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.34.9.tgz#48c87d0dee4f8dc9591a416717f91b4a89d77e3d" - integrity sha512-88I+D3TeKItrw+Y/2ud4Tw0+3CxQ2kLgu3QvrogZ0OfkmX/DEppehus7L3TS2Q4lpB+hYyxhkQiYPJ6Mf5/dPg== - -"@rollup/rollup-linux-arm-musleabihf@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.34.9.tgz#f4c4e7c03a7767f2e5aa9d0c5cfbf5c0f59f2d41" - integrity sha512-3qyfWljSFHi9zH0KgtEPG4cBXHDFhwD8kwg6xLfHQ0IWuH9crp005GfoUUh/6w9/FWGBwEHg3lxK1iHRN1MFlA== - -"@rollup/rollup-linux-arm64-gnu@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.34.9.tgz#1015c9d07a99005025d13b8622b7600029d0b52f" - integrity sha512-6TZjPHjKZUQKmVKMUowF3ewHxctrRR09eYyvT5eFv8w/fXarEra83A2mHTVJLA5xU91aCNOUnM+DWFMSbQ0Nxw== - -"@rollup/rollup-linux-arm64-musl@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.34.9.tgz#8f895eb5577748fc75af21beae32439626e0a14c" - integrity sha512-LD2fytxZJZ6xzOKnMbIpgzFOuIKlxVOpiMAXawsAZ2mHBPEYOnLRK5TTEsID6z4eM23DuO88X0Tq1mErHMVq0A== - -"@rollup/rollup-linux-loongarch64-gnu@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-loongarch64-gnu/-/rollup-linux-loongarch64-gnu-4.34.9.tgz#c9cd5dbbdc6b3ca4dbeeb0337498cf31949004a0" - integrity sha512-dRAgTfDsn0TE0HI6cmo13hemKpVHOEyeciGtvlBTkpx/F65kTvShtY/EVyZEIfxFkV5JJTuQ9tP5HGBS0hfxIg== - -"@rollup/rollup-linux-powerpc64le-gnu@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.34.9.tgz#7ebb5b4441faa17843a210f7d0583a20c93b40e4" - integrity sha512-PHcNOAEhkoMSQtMf+rJofwisZqaU8iQ8EaSps58f5HYll9EAY5BSErCZ8qBDMVbq88h4UxaNPlbrKqfWP8RfJA== - -"@rollup/rollup-linux-riscv64-gnu@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.34.9.tgz#10f5d7349fbd2fe78f9e36ecc90aab3154435c8d" - integrity sha512-Z2i0Uy5G96KBYKjeQFKbbsB54xFOL5/y1P5wNBsbXB8yE+At3oh0DVMjQVzCJRJSfReiB2tX8T6HUFZ2k8iaKg== - -"@rollup/rollup-linux-s390x-gnu@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.34.9.tgz#196347d2fa20593ab09d0b7e2589fb69bdf742c6" - integrity sha512-U+5SwTMoeYXoDzJX5dhDTxRltSrIax8KWwfaaYcynuJw8mT33W7oOgz0a+AaXtGuvhzTr2tVKh5UO8GVANTxyQ== - -"@rollup/rollup-linux-x64-gnu@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.34.9.tgz#7193cbd8d128212b8acda37e01b39d9e96259ef8" - integrity sha512-FwBHNSOjUTQLP4MG7y6rR6qbGw4MFeQnIBrMe161QGaQoBQLqSUEKlHIiVgF3g/mb3lxlxzJOpIBhaP+C+KP2A== - -"@rollup/rollup-linux-x64-musl@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.34.9.tgz#29a6867278ca0420b891574cfab98ecad70c59d1" - integrity sha512-cYRpV4650z2I3/s6+5/LONkjIz8MBeqrk+vPXV10ORBnshpn8S32bPqQ2Utv39jCiDcO2eJTuSlPXpnvmaIgRA== - -"@rollup/rollup-win32-arm64-msvc@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.34.9.tgz#89427dcac0c8e3a6d32b13a03a296a275d0de9a9" - integrity sha512-z4mQK9dAN6byRA/vsSgQiPeuO63wdiDxZ9yg9iyX2QTzKuQM7T4xlBoeUP/J8uiFkqxkcWndWi+W7bXdPbt27Q== - -"@rollup/rollup-win32-ia32-msvc@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.34.9.tgz#ecb9711ba2b6d2bf6ee51265abe057ab90913deb" - integrity sha512-KB48mPtaoHy1AwDNkAJfHXvHp24H0ryZog28spEs0V48l3H1fr4i37tiyHsgKZJnCmvxsbATdZGBpbmxTE3a9w== - -"@rollup/rollup-win32-x64-msvc@4.34.9": - version "4.34.9" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.34.9.tgz#1973871850856ae72bc678aeb066ab952330e923" - integrity sha512-AyleYRPU7+rgkMWbEh71fQlrzRfeP6SyMnRf9XX4fCdDPAJumdSBqYEcWPMzVQ4ScAl7E4oFfK0GUVn77xSwbw== - -"@rushstack/node-core-library@5.11.0": - version "5.11.0" - resolved "https://registry.yarnpkg.com/@rushstack/node-core-library/-/node-core-library-5.11.0.tgz#8ceb980f3a591e1254167bb5ffdc200d1893b783" - integrity sha512-I8+VzG9A0F3nH2rLpPd7hF8F7l5Xb7D+ldrWVZYegXM6CsKkvWc670RlgK3WX8/AseZfXA/vVrh0bpXe2Y2UDQ== - dependencies: - ajv "~8.13.0" - ajv-draft-04 "~1.0.0" - ajv-formats "~3.0.1" - fs-extra "~11.3.0" - import-lazy "~4.0.0" - jju "~1.4.0" - resolve "~1.22.1" - semver "~7.5.4" - -"@rushstack/rig-package@0.5.3": - version "0.5.3" - resolved "https://registry.yarnpkg.com/@rushstack/rig-package/-/rig-package-0.5.3.tgz#ea4d8a3458540b1295500149c04e645f23134e5d" - integrity sha512-olzSSjYrvCNxUFZowevC3uz8gvKr3WTpHQ7BkpjtRpA3wK+T0ybep/SRUMfr195gBzJm5gaXw0ZMgjIyHqJUow== - dependencies: - resolve "~1.22.1" - strip-json-comments "~3.1.1" - -"@rushstack/terminal@0.15.0": - version "0.15.0" - resolved "https://registry.yarnpkg.com/@rushstack/terminal/-/terminal-0.15.0.tgz#9599c6684ed2adc9e74b767a2d45159975bcddb8" - integrity sha512-vXQPRQ+vJJn4GVqxkwRe+UGgzNxdV8xuJZY2zem46Y0p3tlahucH9/hPmLGj2i9dQnUBFiRnoM9/KW7PYw8F4Q== - dependencies: - "@rushstack/node-core-library" "5.11.0" - supports-color "~8.1.1" - -"@rushstack/ts-command-line@4.23.5": - version "4.23.5" - resolved "https://registry.yarnpkg.com/@rushstack/ts-command-line/-/ts-command-line-4.23.5.tgz#523e0db8906d831a730f15acc15fbcf7906c650a" - integrity sha512-jg70HfoK44KfSP3MTiL5rxsZH7X1ktX3cZs9Sl8eDu1/LxJSbPsh0MOFRC710lIuYYSgxWjI5AjbCBAl7u3RxA== - dependencies: - "@rushstack/terminal" "0.15.0" - "@types/argparse" "1.0.38" - argparse "~1.0.9" - string-argv "~0.3.1" - -"@types/argparse@1.0.38": - version "1.0.38" - resolved "https://registry.yarnpkg.com/@types/argparse/-/argparse-1.0.38.tgz#a81fd8606d481f873a3800c6ebae4f1d768a56a9" - integrity sha512-ebDJ9b0e702Yr7pWgB0jzm+CX4Srzz8RcXtLJDJB+BSccqMa36uyH/zUsSYao5+BD1ytv3k3rPYCq4mAE1hsXA== - -"@types/async@^3.2.24": - version "3.2.24" - resolved "https://registry.yarnpkg.com/@types/async/-/async-3.2.24.tgz#3a96351047575bbcf2340541b2d955a35339608f" - integrity sha512-8iHVLHsCCOBKjCF2KwFe0p9Z3rfM9mL+sSP8btyR5vTjJRAqpBYD28/ZLgXPf0pjG1VxOvtCV/BgXkQbpSe8Hw== - -"@types/estree@1.0.6", "@types/estree@^1.0.0": - version "1.0.6" - resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.6.tgz#628effeeae2064a1b4e79f78e81d87b7e5fc7b50" - integrity sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw== - -"@volar/language-core@2.4.11", "@volar/language-core@~2.4.11": - version "2.4.11" - resolved "https://registry.yarnpkg.com/@volar/language-core/-/language-core-2.4.11.tgz#d95a9ec4f14fbdb41a6a64f9f321d11d23a5291c" - integrity sha512-lN2C1+ByfW9/JRPpqScuZt/4OrUUse57GLI6TbLgTIqBVemdl1wNcZ1qYGEo2+Gw8coYLgCy7SuKqn6IrQcQgg== - dependencies: - "@volar/source-map" "2.4.11" - -"@volar/source-map@2.4.11": - version "2.4.11" - resolved "https://registry.yarnpkg.com/@volar/source-map/-/source-map-2.4.11.tgz#5876d4531508129724c2755e295db1df98bd5895" - integrity sha512-ZQpmafIGvaZMn/8iuvCFGrW3smeqkq/IIh9F1SdSx9aUl0J4Iurzd6/FhmjNO5g2ejF3rT45dKskgXWiofqlZQ== - -"@volar/typescript@^2.4.11": - version "2.4.11" - resolved "https://registry.yarnpkg.com/@volar/typescript/-/typescript-2.4.11.tgz#aafbfa413337654db211bf4d8fb6670c89f6fa57" - integrity sha512-2DT+Tdh88Spp5PyPbqhyoYavYCPDsqbHLFwcUI9K1NlY1YgUJvujGdrqUp0zWxnW7KWNTr3xSpMuv2WnaTKDAw== - dependencies: - "@volar/language-core" "2.4.11" - path-browserify "^1.0.1" - vscode-uri "^3.0.8" - -"@vue/compiler-core@3.5.13": - version "3.5.13" - resolved "https://registry.yarnpkg.com/@vue/compiler-core/-/compiler-core-3.5.13.tgz#b0ae6c4347f60c03e849a05d34e5bf747c9bda05" - integrity sha512-oOdAkwqUfW1WqpwSYJce06wvt6HljgY3fGeM9NcVA1HaYOij3mZG9Rkysn0OHuyUAGMbEbARIpsG+LPVlBJ5/Q== - dependencies: - "@babel/parser" "^7.25.3" - "@vue/shared" "3.5.13" - entities "^4.5.0" - estree-walker "^2.0.2" - source-map-js "^1.2.0" - -"@vue/compiler-dom@^3.5.0": - version "3.5.13" - resolved "https://registry.yarnpkg.com/@vue/compiler-dom/-/compiler-dom-3.5.13.tgz#bb1b8758dbc542b3658dda973b98a1c9311a8a58" - integrity sha512-ZOJ46sMOKUjO3e94wPdCzQ6P1Lx/vhp2RSvfaab88Ajexs0AHeV0uasYhi99WPaogmBlRHNRuly8xV75cNTMDA== - dependencies: - "@vue/compiler-core" "3.5.13" - "@vue/shared" "3.5.13" - -"@vue/compiler-vue2@^2.7.16": - version "2.7.16" - resolved "https://registry.yarnpkg.com/@vue/compiler-vue2/-/compiler-vue2-2.7.16.tgz#2ba837cbd3f1b33c2bc865fbe1a3b53fb611e249" - integrity sha512-qYC3Psj9S/mfu9uVi5WvNZIzq+xnXMhOwbTFKKDD7b1lhpnn71jXSFdTQ+WsIEk0ONCd7VV2IMm7ONl6tbQ86A== - dependencies: - de-indent "^1.0.2" - he "^1.2.0" - -"@vue/language-core@2.2.0": - version "2.2.0" - resolved "https://registry.yarnpkg.com/@vue/language-core/-/language-core-2.2.0.tgz#e48c54584f889f78b120ce10a050dfb316c7fcdf" - integrity sha512-O1ZZFaaBGkKbsRfnVH1ifOK1/1BUkyK+3SQsfnh6PmMmD4qJcTU8godCeA96jjDRTL6zgnK7YzCHfaUlH2r0Mw== - dependencies: - "@volar/language-core" "~2.4.11" - "@vue/compiler-dom" "^3.5.0" - "@vue/compiler-vue2" "^2.7.16" - "@vue/shared" "^3.5.0" - alien-signals "^0.4.9" - minimatch "^9.0.3" - muggle-string "^0.4.1" - path-browserify "^1.0.1" - -"@vue/shared@3.5.13", "@vue/shared@^3.5.0": - version "3.5.13" - resolved "https://registry.yarnpkg.com/@vue/shared/-/shared-3.5.13.tgz#87b309a6379c22b926e696893237826f64339b6f" - integrity sha512-/hnE/qP5ZoGpol0a5mDi45bOd7t3tjYJBjsgCsivow7D48cJeV5l05RD82lPqi7gRiphZM37rnhW1l6ZoCNNnQ== - -acorn@^8.14.0: - version "8.14.0" - resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.14.0.tgz#063e2c70cac5fb4f6467f0b11152e04c682795b0" - integrity sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA== - -ajv-draft-04@~1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/ajv-draft-04/-/ajv-draft-04-1.0.0.tgz#3b64761b268ba0b9e668f0b41ba53fce0ad77fc8" - integrity sha512-mv00Te6nmYbRp5DCwclxtt7yV/joXJPGS7nM+97GdxvuttCOfgI3K4U25zboyeX0O+myI8ERluxQe5wljMmVIw== - -ajv-formats@~3.0.1: - version "3.0.1" - resolved "https://registry.yarnpkg.com/ajv-formats/-/ajv-formats-3.0.1.tgz#3d5dc762bca17679c3c2ea7e90ad6b7532309578" - integrity sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ== - dependencies: - ajv "^8.0.0" - -ajv@^8.0.0: - version "8.17.1" - resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.17.1.tgz#37d9a5c776af6bc92d7f4f9510eba4c0a60d11a6" - integrity sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g== - dependencies: - fast-deep-equal "^3.1.3" - fast-uri "^3.0.1" - json-schema-traverse "^1.0.0" - require-from-string "^2.0.2" - -ajv@~8.12.0: - version "8.12.0" - resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.12.0.tgz#d1a0527323e22f53562c567c00991577dfbe19d1" - integrity sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA== - dependencies: - fast-deep-equal "^3.1.1" - json-schema-traverse "^1.0.0" - require-from-string "^2.0.2" - uri-js "^4.2.2" - -ajv@~8.13.0: - version "8.13.0" - resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.13.0.tgz#a3939eaec9fb80d217ddf0c3376948c023f28c91" - integrity sha512-PRA911Blj99jR5RMeTunVbNXMF6Lp4vZXnk5GQjcnUWUTsrXtekg/pnmFFI2u/I36Y/2bITGS30GZCXei6uNkA== - dependencies: - fast-deep-equal "^3.1.3" - json-schema-traverse "^1.0.0" - require-from-string "^2.0.2" - uri-js "^4.4.1" - -alien-signals@^0.4.9: - version "0.4.14" - resolved "https://registry.yarnpkg.com/alien-signals/-/alien-signals-0.4.14.tgz#9ff8f72a272300a51692f54bd9bbbada78fbf539" - integrity sha512-itUAVzhczTmP2U5yX67xVpsbbOiquusbWVyA9N+sy6+r6YVbFkahXvNCeEPWEOMhwDYwbVbGHFkVL03N9I5g+Q== - -argparse@~1.0.9: - version "1.0.10" - resolved "https://registry.yarnpkg.com/argparse/-/argparse-1.0.10.tgz#bcd6791ea5ae09725e17e5ad988134cd40b3d911" - integrity sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg== - dependencies: - sprintf-js "~1.0.2" - -async@^3.2.6: - version "3.2.6" - resolved "https://registry.yarnpkg.com/async/-/async-3.2.6.tgz#1b0728e14929d51b85b449b7f06e27c1145e38ce" - integrity sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA== - -balanced-match@^1.0.0: - version "1.0.2" - resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" - integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== - -brace-expansion@^1.1.7: - version "1.1.11" - resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" - integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA== - dependencies: - balanced-match "^1.0.0" - concat-map "0.0.1" - -brace-expansion@^2.0.1: - version "2.0.1" - resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-2.0.1.tgz#1edc459e0f0c548486ecf9fc99f2221364b9a0ae" - integrity sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA== - dependencies: - balanced-match "^1.0.0" - -compare-versions@^6.1.1: - version "6.1.1" - resolved "https://registry.yarnpkg.com/compare-versions/-/compare-versions-6.1.1.tgz#7af3cc1099ba37d244b3145a9af5201b629148a9" - integrity sha512-4hm4VPpIecmlg59CHXnRDnqGplJFrbLG4aFEl5vl6cK1u76ws3LLvX7ikFnTDl5vo39sjWD6AaDPYodJp/NNHg== - -concat-map@0.0.1: - version "0.0.1" - resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" - integrity sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg== - -confbox@^0.1.8: - version "0.1.8" - resolved "https://registry.yarnpkg.com/confbox/-/confbox-0.1.8.tgz#820d73d3b3c82d9bd910652c5d4d599ef8ff8b06" - integrity sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w== - -confbox@^0.2.1: - version "0.2.1" - resolved "https://registry.yarnpkg.com/confbox/-/confbox-0.2.1.tgz#ae39f2c99699afa451d00206479f15f9a1208a8b" - integrity sha512-hkT3yDPFbs95mNCy1+7qNKC6Pro+/ibzYxtM2iqEigpf0sVw+bg4Zh9/snjsBcf990vfIsg5+1U7VyiyBb3etg== - -de-indent@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/de-indent/-/de-indent-1.0.2.tgz#b2038e846dc33baa5796128d0804b455b8c1e21d" - integrity sha512-e/1zu3xH5MQryN2zdVaF0OrdNLUbvWxzMbi+iNA6Bky7l1RoP8a2fIbRocyHclXt/arDrrR6lL3TqFD9pMQTsg== - -debug@^4.4.0: - version "4.4.0" - resolved "https://registry.yarnpkg.com/debug/-/debug-4.4.0.tgz#2b3f2aea2ffeb776477460267377dc8710faba8a" - integrity sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA== - dependencies: - ms "^2.1.3" - -entities@^4.5.0: - version "4.5.0" - resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48" - integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw== - -esbuild@^0.25.0: - version "0.25.0" - resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.25.0.tgz#0de1787a77206c5a79eeb634a623d39b5006ce92" - integrity sha512-BXq5mqc8ltbaN34cDqWuYKyNhX8D/Z0J1xdtdQ8UcIIIyJyz+ZMKUt58tF3SrZ85jcfN/PZYhjR5uDQAYNVbuw== - optionalDependencies: - "@esbuild/aix-ppc64" "0.25.0" - "@esbuild/android-arm" "0.25.0" - "@esbuild/android-arm64" "0.25.0" - "@esbuild/android-x64" "0.25.0" - "@esbuild/darwin-arm64" "0.25.0" - "@esbuild/darwin-x64" "0.25.0" - "@esbuild/freebsd-arm64" "0.25.0" - "@esbuild/freebsd-x64" "0.25.0" - "@esbuild/linux-arm" "0.25.0" - "@esbuild/linux-arm64" "0.25.0" - "@esbuild/linux-ia32" "0.25.0" - "@esbuild/linux-loong64" "0.25.0" - "@esbuild/linux-mips64el" "0.25.0" - "@esbuild/linux-ppc64" "0.25.0" - "@esbuild/linux-riscv64" "0.25.0" - "@esbuild/linux-s390x" "0.25.0" - "@esbuild/linux-x64" "0.25.0" - "@esbuild/netbsd-arm64" "0.25.0" - "@esbuild/netbsd-x64" "0.25.0" - "@esbuild/openbsd-arm64" "0.25.0" - "@esbuild/openbsd-x64" "0.25.0" - "@esbuild/sunos-x64" "0.25.0" - "@esbuild/win32-arm64" "0.25.0" - "@esbuild/win32-ia32" "0.25.0" - "@esbuild/win32-x64" "0.25.0" - -estree-walker@^2.0.2: - version "2.0.2" - resolved "https://registry.yarnpkg.com/estree-walker/-/estree-walker-2.0.2.tgz#52f010178c2a4c117a7757cfe942adb7d2da4cac" - integrity sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w== - -exsolve@^1.0.1: - version "1.0.1" - resolved "https://registry.yarnpkg.com/exsolve/-/exsolve-1.0.1.tgz#67ba83ed65fa1657d5e010a25782c78dbc3e8a42" - integrity sha512-Smf0iQtkQVJLaph8r/qS8C8SWfQkaq9Q/dFcD44MLbJj6DNhlWefVuaS21SjfqOsBbjVlKtbCj6L9ekXK6EZUg== - -fast-deep-equal@^3.1.1, fast-deep-equal@^3.1.3: - version "3.1.3" - resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525" - integrity sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q== - -fast-uri@^3.0.1: - version "3.0.6" - resolved "https://registry.yarnpkg.com/fast-uri/-/fast-uri-3.0.6.tgz#88f130b77cfaea2378d56bf970dea21257a68748" - integrity sha512-Atfo14OibSv5wAp4VWNsFYE1AchQRTv9cBGWET4pZWHzYshFSS9NQI6I57rdKn9croWVMbYFbLhJ+yJvmZIIHw== - -fs-extra@~11.3.0: - version "11.3.0" - resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-11.3.0.tgz#0daced136bbaf65a555a326719af931adc7a314d" - integrity sha512-Z4XaCL6dUDHfP/jT25jJKMmtxvuwbkrD1vNSMFlo9lNLY2c5FHYSQgHPRZUjAB26TpDEoW9HCOgplrdbaPV/ew== - dependencies: - graceful-fs "^4.2.0" - jsonfile "^6.0.1" - universalify "^2.0.0" - -fsevents@~2.3.2, fsevents@~2.3.3: - version "2.3.3" - resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6" - integrity sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw== - -function-bind@^1.1.2: - version "1.1.2" - resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.2.tgz#2c02d864d97f3ea6c8830c464cbd11ab6eab7a1c" - integrity sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA== - -graceful-fs@^4.1.6, graceful-fs@^4.2.0: - version "4.2.11" - resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3" - integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ== - -has-flag@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b" - integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ== - -hasown@^2.0.2: - version "2.0.2" - resolved "https://registry.yarnpkg.com/hasown/-/hasown-2.0.2.tgz#003eaf91be7adc372e84ec59dc37252cedb80003" - integrity sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ== - dependencies: - function-bind "^1.1.2" - -he@^1.2.0: - version "1.2.0" - resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f" - integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw== - -import-lazy@~4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/import-lazy/-/import-lazy-4.0.0.tgz#e8eb627483a0a43da3c03f3e35548be5cb0cc153" - integrity sha512-rKtvo6a868b5Hu3heneU+L4yEQ4jYKLtjpnPeUdK7h0yzXGmyBTypknlkCvHFBqfX9YlorEiMM6Dnq/5atfHkw== - -is-core-module@^2.16.0: - version "2.16.1" - resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.16.1.tgz#2a98801a849f43e2add644fbb6bc6229b19a4ef4" - integrity sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w== - dependencies: - hasown "^2.0.2" - -jju@~1.4.0: - version "1.4.0" - resolved "https://registry.yarnpkg.com/jju/-/jju-1.4.0.tgz#a3abe2718af241a2b2904f84a625970f389ae32a" - integrity sha512-8wb9Yw966OSxApiCt0K3yNJL8pnNeIv+OEq2YMidz4FKP6nonSRoOXc80iXY4JaN2FC11B9qsNmDsm+ZOfMROA== - -js-tokens@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499" - integrity sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ== - -json-schema-traverse@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz#ae7bcb3656ab77a73ba5c49bf654f38e6b6860e2" - integrity sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug== - -jsonfile@^6.0.1: - version "6.1.0" - resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-6.1.0.tgz#bc55b2634793c679ec6403094eb13698a6ec0aae" - integrity sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ== - dependencies: - universalify "^2.0.0" - optionalDependencies: - graceful-fs "^4.1.6" - -kolorist@^1.8.0: - version "1.8.0" - resolved "https://registry.yarnpkg.com/kolorist/-/kolorist-1.8.0.tgz#edddbbbc7894bc13302cdf740af6374d4a04743c" - integrity sha512-Y+60/zizpJ3HRH8DCss+q95yr6145JXZo46OTpFvDZWLfRCE4qChOyk1b26nMaNpfHHgxagk9dXT5OP0Tfe+dQ== - -local-pkg@^1.0.0: - version "1.1.1" - resolved "https://registry.yarnpkg.com/local-pkg/-/local-pkg-1.1.1.tgz#f5fe74a97a3bd3c165788ee08ca9fbe998dc58dd" - integrity sha512-WunYko2W1NcdfAFpuLUoucsgULmgDBRkdxHxWQ7mK0cQqwPiy8E1enjuRBrhLtZkB5iScJ1XIPdhVEFK8aOLSg== - dependencies: - mlly "^1.7.4" - pkg-types "^2.0.1" - quansync "^0.2.8" - -lodash@~4.17.15: - version "4.17.21" - resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" - integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== - -lru-cache@^6.0.0: - version "6.0.0" - resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-6.0.0.tgz#6d6fe6570ebd96aaf90fcad1dafa3b2566db3a94" - integrity sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA== - dependencies: - yallist "^4.0.0" - -magic-string@^0.30.10, magic-string@^0.30.17: - version "0.30.17" - resolved "https://registry.yarnpkg.com/magic-string/-/magic-string-0.30.17.tgz#450a449673d2460e5bbcfba9a61916a1714c7453" - integrity sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA== - dependencies: - "@jridgewell/sourcemap-codec" "^1.5.0" - -minimatch@^9.0.3: - version "9.0.5" - resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-9.0.5.tgz#d74f9dd6b57d83d8e98cfb82133b03978bc929e5" - integrity sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow== - dependencies: - brace-expansion "^2.0.1" - -minimatch@~3.0.3: - version "3.0.8" - resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.8.tgz#5e6a59bd11e2ab0de1cfb843eb2d82e546c321c1" - integrity sha512-6FsRAQsxQ61mw+qP1ZzbL9Bc78x2p5OqNgNpnoAFLTrX8n5Kxph0CsnhmKKNXTWjXqU5L0pGPR7hYk+XWZr60Q== - dependencies: - brace-expansion "^1.1.7" - -mlly@^1.7.4: - version "1.7.4" - resolved "https://registry.yarnpkg.com/mlly/-/mlly-1.7.4.tgz#3d7295ea2358ec7a271eaa5d000a0f84febe100f" - integrity sha512-qmdSIPC4bDJXgZTCR7XosJiNKySV7O215tsPtDN9iEO/7q/76b/ijtgRu/+epFXSJhijtTCCGp3DWS549P3xKw== - dependencies: - acorn "^8.14.0" - pathe "^2.0.1" - pkg-types "^1.3.0" - ufo "^1.5.4" - -ms@^2.1.3: - version "2.1.3" - resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2" - integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA== - -muggle-string@^0.4.1: - version "0.4.1" - resolved "https://registry.yarnpkg.com/muggle-string/-/muggle-string-0.4.1.tgz#3b366bd43b32f809dc20659534dd30e7c8a0d328" - integrity sha512-VNTrAak/KhO2i8dqqnqnAHOa3cYBwXEZe9h+D5h/1ZqFSTEFHdM65lR7RoIqq3tBBYavsOXV84NoHXZ0AkPyqQ== - -nanoid@^3.3.8: - version "3.3.8" - resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.8.tgz#b1be3030bee36aaff18bacb375e5cce521684baf" - integrity sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w== - -path-browserify@^1.0.1: - version "1.0.1" - resolved "https://registry.yarnpkg.com/path-browserify/-/path-browserify-1.0.1.tgz#d98454a9c3753d5790860f16f68867b9e46be1fd" - integrity sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g== - -path-parse@^1.0.7: - version "1.0.7" - resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" - integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== - -pathe@^2.0.1, pathe@^2.0.3: - version "2.0.3" - resolved "https://registry.yarnpkg.com/pathe/-/pathe-2.0.3.tgz#3ecbec55421685b70a9da872b2cff3e1cbed1716" - integrity sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w== - -picocolors@^1.0.0, picocolors@^1.1.1: - version "1.1.1" - resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.1.1.tgz#3d321af3eab939b083c8f929a1d12cda81c26b6b" - integrity sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA== - -picomatch@^4.0.2: - version "4.0.2" - resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-4.0.2.tgz#77c742931e8f3b8820946c76cd0c1f13730d1dab" - integrity sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg== - -pkg-types@^1.3.0: - version "1.3.1" - resolved "https://registry.yarnpkg.com/pkg-types/-/pkg-types-1.3.1.tgz#bd7cc70881192777eef5326c19deb46e890917df" - integrity sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ== - dependencies: - confbox "^0.1.8" - mlly "^1.7.4" - pathe "^2.0.1" - -pkg-types@^2.0.1: - version "2.1.0" - resolved "https://registry.yarnpkg.com/pkg-types/-/pkg-types-2.1.0.tgz#70c9e1b9c74b63fdde749876ee0aa007ea9edead" - integrity sha512-wmJwA+8ihJixSoHKxZJRBQG1oY8Yr9pGLzRmSsNms0iNWyHHAlZCa7mmKiFR10YPZuz/2k169JiS/inOjBCZ2A== - dependencies: - confbox "^0.2.1" - exsolve "^1.0.1" - pathe "^2.0.3" - -postcss@^8.5.3: - version "8.5.3" - resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.5.3.tgz#1463b6f1c7fb16fe258736cba29a2de35237eafb" - integrity sha512-dle9A3yYxlBSrt8Fu+IpjGT8SY8hN0mlaA6GY8t0P5PjIOZemULz/E2Bnm/2dcUOena75OTNkHI76uZBNUUq3A== - dependencies: - nanoid "^3.3.8" - picocolors "^1.1.1" - source-map-js "^1.2.1" - -punycode@^2.1.0: - version "2.3.1" - resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.1.tgz#027422e2faec0b25e1549c3e1bd8309b9133b6e5" - integrity sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg== - -pyodide@0.26.4: - version "0.26.4" - resolved "https://registry.yarnpkg.com/pyodide/-/pyodide-0.26.4.tgz#e1420722d86d78cb35a4734404eee5b070ca7189" - integrity sha512-z2CHsjVlhhJi5tYBF0AYAfNEPo3zq/z+xOpFtk1tweJkRaTqU4UK/7pLvo8DBU2VDPH31vB3pSI+8fnoqrVrFg== - dependencies: - ws "^8.5.0" - -quansync@^0.2.8: - version "0.2.8" - resolved "https://registry.yarnpkg.com/quansync/-/quansync-0.2.8.tgz#2e893d17bb754ba0988ea399ff0bc5f2a8467793" - integrity sha512-4+saucphJMazjt7iOM27mbFCk+D9dd/zmgMDCzRZ8MEoBfYp7lAvoN38et/phRQF6wOPMy/OROBGgoWeSKyluA== - -require-from-string@^2.0.2: - version "2.0.2" - resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909" - integrity sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw== - -resolve@~1.22.1, resolve@~1.22.2: - version "1.22.10" - resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.22.10.tgz#b663e83ffb09bbf2386944736baae803029b8b39" - integrity sha512-NPRy+/ncIMeDlTAsuqwKIiferiawhefFJtkNSW0qZJEqMEb+qBt/77B/jGeeek+F0uOeN05CDa6HXbbIgtVX4w== - dependencies: - is-core-module "^2.16.0" - path-parse "^1.0.7" - supports-preserve-symlinks-flag "^1.0.0" - -rollup-plugin-dts@^6.1.1: - version "6.1.1" - resolved "https://registry.yarnpkg.com/rollup-plugin-dts/-/rollup-plugin-dts-6.1.1.tgz#46b33f4d1d7f4e66f1171ced9b282ac11a15a254" - integrity sha512-aSHRcJ6KG2IHIioYlvAOcEq6U99sVtqDDKVhnwt70rW6tsz3tv5OSjEiWcgzfsHdLyGXZ/3b/7b/+Za3Y6r1XA== - dependencies: - magic-string "^0.30.10" - optionalDependencies: - "@babel/code-frame" "^7.24.2" - -rollup@^4.30.1: - version "4.34.9" - resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.34.9.tgz#e1eb397856476778aeb6ac2ac3d09b2ce177a558" - integrity sha512-nF5XYqWWp9hx/LrpC8sZvvvmq0TeTjQgaZHYmAgwysT9nh8sWnZhBnM8ZyVbbJFIQBLwHDNoMqsBZBbUo4U8sQ== - dependencies: - "@types/estree" "1.0.6" - optionalDependencies: - "@rollup/rollup-android-arm-eabi" "4.34.9" - "@rollup/rollup-android-arm64" "4.34.9" - "@rollup/rollup-darwin-arm64" "4.34.9" - "@rollup/rollup-darwin-x64" "4.34.9" - "@rollup/rollup-freebsd-arm64" "4.34.9" - "@rollup/rollup-freebsd-x64" "4.34.9" - "@rollup/rollup-linux-arm-gnueabihf" "4.34.9" - "@rollup/rollup-linux-arm-musleabihf" "4.34.9" - "@rollup/rollup-linux-arm64-gnu" "4.34.9" - "@rollup/rollup-linux-arm64-musl" "4.34.9" - "@rollup/rollup-linux-loongarch64-gnu" "4.34.9" - "@rollup/rollup-linux-powerpc64le-gnu" "4.34.9" - "@rollup/rollup-linux-riscv64-gnu" "4.34.9" - "@rollup/rollup-linux-s390x-gnu" "4.34.9" - "@rollup/rollup-linux-x64-gnu" "4.34.9" - "@rollup/rollup-linux-x64-musl" "4.34.9" - "@rollup/rollup-win32-arm64-msvc" "4.34.9" - "@rollup/rollup-win32-ia32-msvc" "4.34.9" - "@rollup/rollup-win32-x64-msvc" "4.34.9" - fsevents "~2.3.2" - -semver@~7.5.4: - version "7.5.4" - resolved "https://registry.yarnpkg.com/semver/-/semver-7.5.4.tgz#483986ec4ed38e1c6c48c34894a9182dbff68a6e" - integrity sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA== - dependencies: - lru-cache "^6.0.0" - -source-map-js@^1.2.0, source-map-js@^1.2.1: - version "1.2.1" - resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-1.2.1.tgz#1ce5650fddd87abc099eda37dcff024c2667ae46" - integrity sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA== - -source-map@~0.6.1: - version "0.6.1" - resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263" - integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g== - -sprintf-js@~1.0.2: - version "1.0.3" - resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c" - integrity sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g== - -string-argv@~0.3.1: - version "0.3.2" - resolved "https://registry.yarnpkg.com/string-argv/-/string-argv-0.3.2.tgz#2b6d0ef24b656274d957d54e0a4bbf6153dc02b6" - integrity sha512-aqD2Q0144Z+/RqG52NeHEkZauTAUWJO8c6yTftGJKO3Tja5tUgIfmIl6kExvhtxSDP7fXB6DvzkfMpCd/F3G+Q== - -strip-json-comments@~3.1.1: - version "3.1.1" - resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006" - integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig== - -supports-color@~8.1.1: - version "8.1.1" - resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-8.1.1.tgz#cd6fc17e28500cff56c1b86c0a7fd4a54a73005c" - integrity sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q== - dependencies: - has-flag "^4.0.0" - -supports-preserve-symlinks-flag@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" - integrity sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w== - -typescript@5.7.3: - version "5.7.3" - resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.7.3.tgz#919b44a7dbb8583a9b856d162be24a54bf80073e" - integrity sha512-84MVSjMEHP+FQRPy3pX9sTVV/INIex71s9TL2Gm5FG/WG1SqXeKyZ0k7/blY/4FdOzI12CBy1vGc4og/eus0fw== - -typescript@^5.8.2: - version "5.8.2" - resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.8.2.tgz#8170b3702f74b79db2e5a96207c15e65807999e4" - integrity sha512-aJn6wq13/afZp/jT9QZmwEjDqqvSGp1VT5GVg+f/t6/oVyrgXM6BY1h9BRh/O5p3PlUPAe+WuiEZOmb/49RqoQ== - -ufo@^1.5.4: - version "1.5.4" - resolved "https://registry.yarnpkg.com/ufo/-/ufo-1.5.4.tgz#16d6949674ca0c9e0fbbae1fa20a71d7b1ded754" - integrity sha512-UsUk3byDzKd04EyoZ7U4DOlxQaD14JUKQl6/P7wiX4FNvUfm3XL246n9W5AmqwW5RSFJ27NAuM0iLscAOYUiGQ== - -universalify@^2.0.0: - version "2.0.1" - resolved "https://registry.yarnpkg.com/universalify/-/universalify-2.0.1.tgz#168efc2180964e6386d061e094df61afe239b18d" - integrity sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw== - -uri-js@^4.2.2, uri-js@^4.4.1: - version "4.4.1" - resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.4.1.tgz#9b1a52595225859e55f669d928f88c6c57f2a77e" - integrity sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg== - dependencies: - punycode "^2.1.0" - -vite-plugin-dts@^4.5.3: - version "4.5.3" - resolved "https://registry.yarnpkg.com/vite-plugin-dts/-/vite-plugin-dts-4.5.3.tgz#9d9e64c12e21e9389efd9f999e034cd5864b0027" - integrity sha512-P64VnD00dR+e8S26ESoFELqc17+w7pKkwlBpgXteOljFyT0zDwD8hH4zXp49M/kciy//7ZbVXIwQCekBJjfWzA== - dependencies: - "@microsoft/api-extractor" "^7.50.1" - "@rollup/pluginutils" "^5.1.4" - "@volar/typescript" "^2.4.11" - "@vue/language-core" "2.2.0" - compare-versions "^6.1.1" - debug "^4.4.0" - kolorist "^1.8.0" - local-pkg "^1.0.0" - magic-string "^0.30.17" - -vite@^6.2.0: - version "6.2.0" - resolved "https://registry.yarnpkg.com/vite/-/vite-6.2.0.tgz#9dcb543380dab18d8384eb840a76bf30d78633f0" - integrity sha512-7dPxoo+WsT/64rDcwoOjk76XHj+TqNTIvHKcuMQ1k4/SeHDaQt5GFAeLYzrimZrMpn/O6DtdI03WUjdxuPM0oQ== - dependencies: - esbuild "^0.25.0" - postcss "^8.5.3" - rollup "^4.30.1" - optionalDependencies: - fsevents "~2.3.3" - -vscode-uri@^3.0.8: - version "3.1.0" - resolved "https://registry.yarnpkg.com/vscode-uri/-/vscode-uri-3.1.0.tgz#dd09ec5a66a38b5c3fffc774015713496d14e09c" - integrity sha512-/BpdSx+yCQGnCvecbyXdxHDkuk55/G3xwnC0GqY4gmQ3j+A+g8kzzgB4Nk/SINjqn6+waqw3EgbVF2QKExkRxQ== - -ws@^8.5.0: - version "8.18.1" - resolved "https://registry.yarnpkg.com/ws/-/ws-8.18.1.tgz#ea131d3784e1dfdff91adb0a4a116b127515e3cb" - integrity sha512-RKW2aJZMXeMxVpnZ6bck+RswznaxmzdULiBr6KY7XkTnW8uvt0iT9H5DkHUChXrc+uurzwa0rVI16n/Xzjdz1w== - -yallist@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72" - integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A== diff --git a/readme.md b/readme.md index 4453413..56d762c 100644 --- a/readme.md +++ b/readme.md @@ -112,8 +112,8 @@ The Peptonizer2000 workflow is comprised of the following steps: ### Prerequisites -## Python counterpart -The actual code that builds the factor graph and executes the Peptonizer algorithm, is implemented in Python and can be found in the `peptonizer` folder. +## Rust implementation +The core algorithm is implemented in Rust in the `peptonizer_rust` folder. Wheels are created for all major platforms so users can use the package on supported systems. ### Running as snakemake workflow In order to run the Peptonizer2000 on your own system, you should install Conda, Mamba and all of its dependencies. @@ -126,6 +126,71 @@ Follow the installation instructions step-by-step for an explanation of what you * Run `conda activate peptonizer` to switch the current Conda environment to the peptonizer environment you created earlier. * Start the peptonizer with the command `snakemake --use-conda --cores 1`. If you have sufficient CPU and memory power available to your system, you can increase the amount of cores in order to speed up the workflow. +If you see the following error while installing dependencies: + +``` +ERROR: Could not find a version that satisfies the requirement peptonizer_rust (from versions: none) +ERROR: No matching distribution found for peptonizer_rust +``` + +then the workflow could not find a wheel for `peptonizer_rust` for your platform. To create one manually, expand the instructions below. + +
+ Show manual wheel creation instructions + + - Change to the Rust package directory: + + ```bash + cd peptonizer_rust + ``` + + - Download the rustup installer: + + ```bash + curl -sSf -o rustup-init.sh https://sh.rustup.rs + ``` + + - Run the installer with defaults: + + ```bash + sh rustup-init.sh -y + ``` + + - Make `cargo` available in this shell session: + + ```bash + source "$HOME/.cargo/env" + ``` + + - Update the Rust toolchain to stable: + + ```bash + rustup update + ``` + + - Install Linux build tools (Debian/Ubuntu example): + + ```bash + sudo apt-get update + sudo apt-get install -y build-essential + ``` + + - Install `maturin` into the active Conda environment (run after `conda activate peptonizer`): + + ```bash + python -m pip install --upgrade pip setuptools wheel + python -m pip install maturin + ``` + + - Build the wheel for Python 3.12 (adjust `-i` if using a different Python): + + ```bash + maturin build --release --out dist -i python3.12 + ``` + + The command writes one or more `.whl` files to `peptonizer_rust/dist`. + +
### Configuration file diff --git a/snakemake/config/config.yaml b/snakemake/config/config.yaml index ba34be2..7e30ecf 100644 --- a/snakemake/config/config.yaml +++ b/snakemake/config/config.yaml @@ -5,7 +5,7 @@ input_file: '../resources/test_files/S03.tsv' # In which directory should all output (and intermediate) files be stored. This directory is relative to the location of # the Snakefile. -data_dir: '../data' +data_dir: '../data/' # In which directory should all logs for each of the different Snakemake rules be stored? log_dir: '../logs' diff --git a/snakemake/workflow/Snakefile b/snakemake/workflow/Snakefile index 1420caa..eb2b727 100644 --- a/snakemake/workflow/Snakefile +++ b/snakemake/workflow/Snakefile @@ -19,18 +19,17 @@ project_root = Path(path.join(Path(workflow.basedir).parent.parent)).as_posix() rule All: input: path.join(data_dir, 'best_parameters.csv'), - path.join(data_dir, 'peptonizer_result.csv'), + path.join(data_dir, 'peptonizer_result.json'), path.join(data_dir, 'peptonizer_result.png') + rule GetTaxonomyFromUnipept: input: config['input_file'] output: path.join(data_dir, 'peptide_taxa.json') - log: - path.join(log_dir, 'get_taxonomy_from_unipept.log') params: - taxon_query=config['taxon_query'], + taxon_query=str(config['taxon_query']), taxon_rank=config['taxon_rank'], run_command=select_command(config['profile'], path.join(benchmark_dir, "get_taxonomy_from_unipept.prof")) benchmark: @@ -39,7 +38,7 @@ rule GetTaxonomyFromUnipept: shell: """ export PYTHONPATH={project_root}:${{PYTHONPATH:-}} - {params.run_command} scripts/pout_to_taxonomy.py --input-file {input[0]} --unipept-response-file {output[0]} --log-file {log[0]} --taxonomy-query {params.taxon_query} --taxon-rank {params.taxon_rank} + {params.run_command} scripts/pout_to_taxonomy.py --input-file {input[0]} --unipept-response-file {output[0]} --taxonomy-query {params.taxon_query} --taxon-rank {params.taxon_rank} """ rule WeightTaxa: @@ -66,7 +65,7 @@ rule CreatePepGMGraph: input: path.join(data_dir, 'sequence_scores_dataframe.csv') output: - path.join(data_dir, 'pepgm_full_graph.graphml') + path.join(data_dir, 'pepgm_full_graph_bytes.bin') params: run_command=select_command(config['profile'], path.join(benchmark_dir, "create_pepgm_graph.prof")) benchmark: @@ -80,9 +79,9 @@ rule CreatePepGMGraph: rule RunPepGM: input: - path.join(data_dir, 'pepgm_full_graph.graphml') + path.join(data_dir, 'pepgm_full_graph_bytes.bin') output: - path.join(data_dir, 'prior{prior}', 'pepgm_results_a{alpha}_b{beta}_p{prior}.csv') + path.join(data_dir, 'prior{prior}', 'pepgm_results_a{alpha}_b{beta}_p{prior}.json') params: regularized=config['regularized'], run_command=select_command(config['profile'], path.join(benchmark_dir, 'prior{prior}', "run_pepgm_graph_a{alpha}_b{beta}_p{prior}.prof")) @@ -92,12 +91,12 @@ rule RunPepGM: shell: """ export PYTHONPATH={project_root}:${{PYTHONPATH:-}} - {params.run_command} scripts/pepgm.py --communities-graphml-path {input[0]} --out {output[0]} --prior {wildcards.prior} --alpha {wildcards.alpha} --beta {wildcards.beta} --regularized {params.regularized} + {params.run_command} scripts/pepgm.py --communities-graph-bytes-path {input[0]} --out {output[0]} --prior {wildcards.prior} --alpha {wildcards.alpha} --beta {wildcards.beta} --regularized {params.regularized} """ rule GenerateBarPlot: input: - path.join(data_dir, 'prior{prior}', 'pepgm_results_a{alpha}_b{beta}_p{prior}.csv') + path.join(data_dir, 'prior{prior}', 'pepgm_results_a{alpha}_b{beta}_p{prior}.json') output: path.join(data_dir, 'prior{prior}', 'pepgm_results_a{alpha}_b{beta}_p{prior}.png') params: @@ -112,7 +111,7 @@ rule GenerateBarPlot: rule PerformTaxaClustering: input: - path.join(data_dir, 'pepgm_full_graph.graphml'), + path.join(data_dir, 'sequence_scores_dataframe.csv'), path.join(data_dir, 'taxa_weights_dataframe.csv') output: path.join(data_dir, 'clustered_taxa_weights_dataframe.csv') @@ -125,7 +124,7 @@ rule PerformTaxaClustering: shell: """ export PYTHONPATH={project_root}:${{PYTHONPATH:-}} - {params.run_command} scripts/cluster_taxa.py --full-graphml-path {input[0]} --taxa-weights-dataframe-file {input[1]} --similarity-threshold {params.similarity_threshold} --out {output[0]} + {params.run_command} scripts/cluster_taxa.py --sequence-scores-dataframe-file {input[0]} --taxa-weights-dataframe-file {input[1]} --similarity-threshold {params.similarity_threshold} --out {output[0]} """ rule FindBestParameters: @@ -134,7 +133,7 @@ rule FindBestParameters: expand(path.join(data_dir, 'prior{prior}', 'pepgm_results_a{alpha}_b{beta}_p{prior}.png'), alpha=config['alpha'], beta=config['beta'], prior=config['prior']) output: path.join(data_dir, 'best_parameters.csv'), - path.join(data_dir, 'peptonizer_result.csv'), + path.join(data_dir, 'peptonizer_result.json'), path.join(data_dir, 'peptonizer_result.png') params: results_dir=data_dir, @@ -145,5 +144,5 @@ rule FindBestParameters: shell: """ export PYTHONPATH={project_root}:${{PYTHONPATH:-}} - {params.run_command} scripts/find_best_parameters.py --taxa-weights-dataframe-file {input[0]} --results-folder {params.results_dir} --best-params-file {output[0]} --best-params-csv {output[1]} --best-params-png {output[2]} + {params.run_command} scripts/find_best_parameters.py --taxa-weights-dataframe-file {input[0]} --results-folder {params.results_dir} --best-params-file {output[0]} --best-params-json {output[1]} --best-params-png {output[2]} """ diff --git a/snakemake/workflow/env.yml b/snakemake/workflow/env.yml index 06d0776..a1327c8 100644 --- a/snakemake/workflow/env.yml +++ b/snakemake/workflow/env.yml @@ -1,23 +1,16 @@ name: peptonizer channels: + - conda-forge - bioconda - defaults - - conda-forge dependencies: - - requests=2.25.1 - - ete3=3.1.2 - - aiofiles=23.2.1 + - python=3.12.* - numpy=1.26.4 - pandas=2.2.2 - - pandas-stubs - - scikit-learn=1.4.2 - - aiohttp=3.9.5 - - networkx=3.2 - - types-networkx - - scipy=1.13.1 - matplotlib=3.8.4 - - numba - - pyright - pip - pip: - - rbo \ No newline at end of file + - rbo + - --find-links=../../peptonizer_rust/dist/ + - --find-links=../../../../peptonizer_rust/dist/ + - peptonizer_rust \ No newline at end of file diff --git a/snakemake/workflow/scripts/bar_plot_results.py b/snakemake/workflow/scripts/bar_plot_results.py index 9678244..b4f50c0 100644 --- a/snakemake/workflow/scripts/bar_plot_results.py +++ b/snakemake/workflow/scripts/bar_plot_results.py @@ -1,19 +1,81 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt import matplotlib +import json import argparse -from peptonizer.peptonizer import plot_peptonizer_results +from peptonizer_rust import get_names_for_taxa_py """ -Script that takes PepGM .csv output, translates taxIDS to scientific names, and barplots the *number of results* highest +Script that takes PepGM .json output, translates taxIDS to scientific names, and barplots the *number of results* highest scoring taxa. """ +def plot_peptonizer_results(input_file: str, output_file: str, number_of_taxa: int = 25): + """ + Read the results of a Peptonizer run from a JSON-file (denoted by the input_file argument) and write bar charts + representing these results to a PNG-file. + """ + assert input_file.lower().endswith(".json"), "Input file should be a JSON file." + assert output_file.lower().endswith(".png"), "Output file should be a PNG file." + + # Read JSON file + with open(input_file, "r") as f: + data = json.load(f) + taxon_scores: Dict[int, float] = { + int(k): float(v) + for k, v in data.items() + } + + # Get top N taxa by score + top_taxa = sorted( + taxon_scores.items(), + key=lambda x: x[1], + reverse=True + )[:number_of_taxa] + taxon_ids = [taxon_id for taxon_id, _ in top_taxa] + taxon_scores = [score for _, score in top_taxa] + taxon_names_dict = json.loads(get_names_for_taxa_py(taxon_ids)) + taxon_names_dict: Dict[int, str] = { + int(k): str(v) + for k, v in taxon_names_dict.items() + } + taxon_names = [taxon_names_dict[taxon_id] for taxon_id in taxon_ids] + + # make the barplot + fig, ax = plt.subplots() + fig.set_size_inches(30, 15) + bars = ax.barh( + range(len(taxon_names)), + taxon_scores, + color="#283593", + ) + + ax.set_yticks(range(len(taxon_names))) + ax.set_yticklabels(taxon_names, fontsize=24, color="#283593", fontweight="bold") + ax.tick_params(axis='y', which='major', pad=15) + plt.xlim((0, 1)) + plt.xlabel("Probability score", fontsize=35, fontweight="bold") + ax.xaxis.set_ticks(np.arange(0, 1.2, 0.2)) + ax.xaxis.set_ticklabels([0, 0.2, 0.4, 0.6, 0.8, 1.0], fontsize=35) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.spines["bottom"].set_visible(False) + ax.spines["left"].set_visible(False) + ax.bar_label(bars, fmt='{:,.3f}', fontsize=24, fontweight='bold', color='black', padding=20) + + fig.tight_layout() + + plt.savefig(output_file) + plt.close() + matplotlib.use("Agg") parser = argparse.ArgumentParser(description="Generate BarPlot of PepGM results.") parser.add_argument( - "--results-file", type=str, help="Path(s) to your PepGM results CSV." + "--results-file", type=str, help="Path(s) to your PepGM results JSON." ) parser.add_argument( "--number-of-results", @@ -28,4 +90,4 @@ ) args = parser.parse_args() -plot_peptonizer_results(args.results_file, args.results_file.replace(".csv", ".png"), args.number_of_results) +plot_peptonizer_results(args.results_file, args.results_file.replace(".json", ".png"), args.number_of_results) diff --git a/snakemake/workflow/scripts/cluster_taxa.py b/snakemake/workflow/scripts/cluster_taxa.py index 68ed437..dc105a5 100644 --- a/snakemake/workflow/scripts/cluster_taxa.py +++ b/snakemake/workflow/scripts/cluster_taxa.py @@ -1,15 +1,13 @@ import argparse -import networkx as nx -import pandas as pd -from peptonizer.peptonizer import cluster_taxa_based_on_similarity +from peptonizer_rust import cluster_taxa_py parser = argparse.ArgumentParser(description = 'cluster Taxa based on peptidome similarity and weight attributed') parser.add_argument( - '--full-graphml-path', + '--sequence-scores-dataframe-file', type = str, - help = 'Path(s) to the full Peptonizer graphml file for which you wish to cluster taxa (not containing communities).' + help = 'Path to the sequence scores dataframe file.' ) parser.add_argument( '--taxa-weights-dataframe-file', @@ -29,10 +27,15 @@ args = parser.parse_args() -clustered_taxa_df = cluster_taxa_based_on_similarity( - nx.read_graphml(args.full_graphml_path), - pd.read_csv(args.taxa_weights_dataframe_file), +with open(args.sequence_scores_dataframe_file, 'r') as sequence_scores_file, open(args.taxa_weights_dataframe_file, 'r') as taxa_weights_file: + sequence_scores_csv = sequence_scores_file.read() + taxa_weights_csv = taxa_weights_file.read() + +clustered_taxa_csv = cluster_taxa_py( + sequence_scores_csv, + taxa_weights_csv, args.similarity_threshold ) -clustered_taxa_df.to_csv(args.out) +with open(args.out, 'w') as clustered_taxa_file: + clustered_taxa_file.write(clustered_taxa_csv) diff --git a/snakemake/workflow/scripts/create_pepgm_graph.py b/snakemake/workflow/scripts/create_pepgm_graph.py index e9f7475..647066e 100644 --- a/snakemake/workflow/scripts/create_pepgm_graph.py +++ b/snakemake/workflow/scripts/create_pepgm_graph.py @@ -1,7 +1,6 @@ import argparse -import pandas as pd -from peptonizer.peptonizer import generate_pepgm_graph +from peptonizer_rust import generate_pepgm_graph_py parser = argparse.ArgumentParser( @@ -18,10 +17,15 @@ "--out", type=str, required=True, - help="Path to output file where GraphML will be saved.", + help="Path to output file where graph bytes will be saved.", ) args = parser.parse_args() -ct_factor_graph = generate_pepgm_graph(pd.read_csv(args.sequence_scores_dataframe_file, dtype={"HigherTaxa": "Int64"})) -ct_factor_graph.save_to_graph_ml(args.out) +with open(args.sequence_scores_dataframe_file, "r") as f: + csv_str = f.read() + +ct_factor_graph_bytes = generate_pepgm_graph_py(csv_str) + +with open(args.out, 'wb') as graphml_file: + graphml_file.write(ct_factor_graph_bytes) diff --git a/snakemake/workflow/scripts/find_best_parameters.py b/snakemake/workflow/scripts/find_best_parameters.py index 39d2d53..454e6af 100644 --- a/snakemake/workflow/scripts/find_best_parameters.py +++ b/snakemake/workflow/scripts/find_best_parameters.py @@ -5,7 +5,7 @@ import shutil from os import path -from peptonizer.peptonizer import find_best_parameters, ParameterSet, clean_csv, parse_taxon_scores +from peptonizer_rust import compute_goodness_py, clean_csv_py parser = argparse.ArgumentParser() @@ -20,7 +20,7 @@ "--results-folder", type=str, required=True, - help="Path to a folder containing CSV-files with all the results from a prior PepGM analysis.", + help="Path to a folder containing JSON-files with all the results from a prior PepGM analysis.", ) parser.add_argument( "--best-params-file", @@ -29,10 +29,10 @@ help="Path to the output file where the best suited parameter set should be stored in." ) parser.add_argument( - "--best-params-csv", + "--best-params-json", type=str, required=True, - help="Path to the output file where the results of the best Peptonizer run in CSV format should be stored." + help="Path to the output file where the results of the best Peptonizer run in JSON format should be stored." ) parser.add_argument( "--best-params-png", @@ -43,16 +43,16 @@ args = parser.parse_args() -def find_csv_files(folder_path): - csv_files = [] +def find_json_files(folder_path): + json_files = [] # Walk through the directory and subdirectories for root, dirs, files in os.walk(folder_path): for file in files: - if file.endswith('.csv') and file.find("pepgm_results") >= 0: - csv_files.append(os.path.join(root, file)) + if file.endswith('.json') and file.find("pepgm_results") >= 0: + json_files.append(os.path.join(root, file)) - return csv_files + return json_files def extract_parameters(filename): # Regular expression to find the patterns 'aX', 'bX', 'pX' where X is a float @@ -69,38 +69,39 @@ def extract_parameters(filename): raise ValueError("The filename does not contain valid 'a', 'b', and 'p' parameters.") # Get all the taxa weights that are required to compute the goodness metric for each results file -weights_df = pd.read_csv( - args.taxa_weights_dataframe_file, - usecols = ['HigherTaxa', 'scaled_weight'] -) +taxide_weights = "" +with open(args.taxa_weights_dataframe_file, 'r') as taxid_weights_file: + taxid_weights = taxid_weights_file.read() # Store all result dataframes and the corresponding parameter sets in this list that will be used to finally find the # best parameter set. -results_and_params = [] -for result_file in find_csv_files(args.results_folder): +best_param_set = (0, 0, 0) +best_goodness = 0.0 +for result_file in find_json_files(args.results_folder): alpha, beta, prior = extract_parameters(result_file) with open(result_file, "r") as f: - taxon_scores = parse_taxon_scores(f.read()) - parameter_set = ParameterSet(alpha, beta, prior) - results_and_params.append((taxon_scores, parameter_set)) - -best_param_set = find_best_parameters(results_and_params, weights_df) + peptonizer_result = f.read() + goodness = compute_goodness_py(taxid_weights, peptonizer_result) + if goodness > best_goodness: + best_goodness = goodness + best_param_set = (alpha, beta, prior) # Write out the best parameters to a CSV file for future reference +(alpha, beta, prior) = best_param_set with open(args.best_params_file, "w") as f: f.write("alpha,beta,prior\n") - f.write(f"{best_param_set.alpha},{best_param_set.beta},{best_param_set.prior}\n") + f.write(f"{alpha},{beta},{prior}\n") # Clean the CSV for the best parameters and write it to the final output directory -best_csv_path = path.join(args.results_folder, f"prior{best_param_set.prior}", f"pepgm_results_a{best_param_set.alpha}_b{best_param_set.beta}_p{best_param_set.prior}.csv") -with open(best_csv_path, "r") as in_file: - clean_taxa_csv = clean_csv(in_file.read()) +best_json_path = path.join(args.results_folder, f"prior{prior}", f"pepgm_results_a{alpha}_b{beta}_p{prior}.json") +with open(best_json_path, "r") as in_file: + clean_taxa_json = clean_csv_py(in_file.read()) - with open(args.best_params_csv, "w") as out_file: - out_file.write(clean_taxa_csv) + with open(args.best_params_json, "w") as out_file: + out_file.write(clean_taxa_json) # Copy the plots with the best parameters to the final output directory shutil.copy( - path.join(args.results_folder, f"prior{best_param_set.prior}", f"pepgm_results_a{best_param_set.alpha}_b{best_param_set.beta}_p{best_param_set.prior}.png"), + path.join(args.results_folder, f"prior{prior}", f"pepgm_results_a{alpha}_b{beta}_p{prior}.png"), args.best_params_png ) diff --git a/snakemake/workflow/scripts/infer_taxa_weights.py b/snakemake/workflow/scripts/infer_taxa_weights.py index 7f2fd5e..5a7bdcd 100644 --- a/snakemake/workflow/scripts/infer_taxa_weights.py +++ b/snakemake/workflow/scripts/infer_taxa_weights.py @@ -1,8 +1,7 @@ import argparse import gzip -import json -from peptonizer.peptonizer import perform_taxa_weighing, parse_peptide_tsv, UnipeptCommunicator +from peptonizer_rust import perform_taxa_weighing_py, parse_input_peptides_py parser = argparse.ArgumentParser() @@ -49,22 +48,23 @@ file_contents = file.read() # Parse the input MS2Rescore file -pep_score, pep_psm_counts = parse_peptide_tsv(file_contents) +pep_score, pep_psm_counts = parse_input_peptides_py(file_contents) -# Parse the Unipept response file +# Read the Unipept response file +unipept_responses = "" with open(args.unipept_response_file, "r") as file: - unipept_responses = json.load(file) + unipept_responses = file.read() -df, weights = perform_taxa_weighing( +sequence_scores, taxa_weights = perform_taxa_weighing_py( unipept_responses, pep_score, pep_psm_counts, args.number_of_taxa, - UnipeptCommunicator(), args.taxon_rank ) print("Started dumping produced results to CSV-files...") -df.to_csv(args.sequence_scores_dataframe_file) -weights.to_csv(args.taxa_weights_dataframe_file) +with open(args.sequence_scores_dataframe_file, 'w') as sequences_file, open(args.taxa_weights_dataframe_file, 'w') as weights_file: + sequences_file.write(sequence_scores) + weights_file.write(taxa_weights) diff --git a/snakemake/workflow/scripts/pepgm.py b/snakemake/workflow/scripts/pepgm.py index 4d1a4cd..327a674 100644 --- a/snakemake/workflow/scripts/pepgm.py +++ b/snakemake/workflow/scripts/pepgm.py @@ -1,6 +1,6 @@ import argparse -from peptonizer.peptonizer import run_belief_propagation +from peptonizer_rust import execute_pepgm_py parser = argparse.ArgumentParser( @@ -8,10 +8,10 @@ ) parser.add_argument( - "--communities-graphml-path", + "--communities-graph-bytes-path", type=str, required=True, - help="Path to where the GraphML file of the factor graph (using Louvain communities) is stored.", + help="Path to where the binary file of the factor graph (using Louvain communities) is stored.", ) parser.add_argument( "--max-iter", @@ -54,8 +54,8 @@ args = parser.parse_args() -with open(args.communities_graphml_path, 'r') as in_file: - csv_content = run_belief_propagation( +with open(args.communities_graph_bytes_path, 'rb') as in_file: + json_content = execute_pepgm_py( in_file.read(), args.alpha, args.beta, @@ -66,4 +66,4 @@ ) with open(args.out, 'w') as out_file: - out_file.write(csv_content) + out_file.write(json_content) diff --git a/snakemake/workflow/scripts/pout_to_taxonomy.py b/snakemake/workflow/scripts/pout_to_taxonomy.py index 74a013d..05ec3e5 100644 --- a/snakemake/workflow/scripts/pout_to_taxonomy.py +++ b/snakemake/workflow/scripts/pout_to_taxonomy.py @@ -1,13 +1,14 @@ import argparse import json -from peptonizer.peptonizer import parse_peptide_tsv, fetch_peptides_and_filter_taxa, UnipeptCommunicator +from peptonizer_rust import fetch_unipept_taxa_py, parse_unique_peptides_py parser = argparse.ArgumentParser() parser.add_argument( "--taxonomy-query", + type=str, required=True, help="Taxa that should be used to query in Unipept. If querying all taxa, put [1].", ) @@ -23,12 +24,6 @@ required=True, help="Path to output file that contains all queried peptide counts (which should be used in the next step)." ) -parser.add_argument( - "--log-file", - type=str, - required=True, - help="Output: path to logfile where failed Unipept query attempts are stored.", -) parser.add_argument( "--taxon-rank", type=str, @@ -43,14 +38,13 @@ file_contents = file.read() # Parse the input TSV file -pep_score, pep_psm_counts = parse_peptide_tsv(file_contents) +peptides = parse_unique_peptides_py(file_contents) -unipept_response = fetch_peptides_and_filter_taxa( - list(pep_score.keys()), - args.taxonomy_query, +unipept_response = fetch_unipept_taxa_py( + peptides, args.taxon_rank, - UnipeptCommunicator() + args.taxonomy_query ) with open(args.unipept_response_file, "w") as f: - f.write(json.dumps(unipept_response)) + f.write(unipept_response)