Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions bioconda-import/bioconda_importer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
#!/usr/bin/env python

import os
import sys
import yaml
import argparse
from pathlib import Path
import jinja2

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from common.metadata import normalize_version_fields

def clean(content_path):
import_directory = os.path.join(content_path, "imports", "bioconda")
os.makedirs(import_directory, exist_ok=True)
Expand Down Expand Up @@ -56,6 +60,7 @@ def merge(conda, content_path):
biotools_data_path = os.path.join(content_path, 'data')
for name, data in conda.items():
try:
data = normalize_version_fields(data, ["package.version"])
package_name = data['package']['name']
import_file_path = os.path.join(bioconda_import_path, f"bioconda_{package_name}.yaml")
with open(import_file_path, "w") as out:
Expand Down
5 changes: 5 additions & 0 deletions bioconductor-import/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
import glob
import json
import os
import sys
import requests
import logging
import yaml

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from common.metadata import normalize_version_fields

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()
Expand Down Expand Up @@ -86,6 +90,7 @@ def retrieve(version, filters=None):
path = os.path.join("imports", "bioconductor", f"{package_name}.bioconductor.json")

try:
pack = normalize_version_fields(pack, ["Version"])
with open(path, "w") as write_file:
json.dump(pack, write_file, sort_keys=True, indent=4, separators=(",", ": "))
logger.info(f"Saved {idx}/{total_packs} - {package_name}")
Expand Down
7 changes: 7 additions & 0 deletions biotools-import/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
import requests
from boltons.iterutils import remap

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from common.metadata import normalize_version_fields

BIOTOOLS_DOMAIN = "https://bio.tools"
SSL_VERIFY = True

Expand Down Expand Up @@ -49,6 +52,10 @@ def retrieve(filters=None):
with open(os.path.join(directory, tpe_id + ".biotools.json"), "w") as write_file:
drop_false = lambda path, key, value: bool(value)
tool_cleaned = remap(tool, visit=drop_false)
tool_cleaned = normalize_version_fields(
tool_cleaned, ["version"]
)

json.dump(
tool_cleaned, write_file, sort_keys=True, indent=4, separators=(",", ": ")
)
Expand Down
105 changes: 105 additions & 0 deletions common/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import logging


def normalize_version_to_string(value):
"""
Recursively convert version values to strings.

This function processes version data by converting numeric types to strings
while preserving None and boolean values. It recursively processes nested
structures (lists and dicts).

Args:
value: The value to normalize. Can be any type.

Returns:
- None and bool values are returned unchanged
- int and float values are converted to strings
- Lists are processed recursively, returning a new list with normalized values
- Dicts are processed recursively, returning a new dict with normalized values
- Other types are returned unchanged

Examples:
>>> normalize_version_to_string(1)
'1'
>>> normalize_version_to_string([1, 2, 3])
['1', '2', '3']
>>> normalize_version_to_string({'version': 1.5})
{'version': '1.5'}
"""
if value is None or isinstance(value, bool):
return value
if isinstance(value, (int, float)):
return str(value)
if isinstance(value, list):
return [normalize_version_to_string(v) for v in value]
if isinstance(value, dict):
return {k: normalize_version_to_string(v) for k, v in value.items()}
return value


def normalize_version_fields(data, field_paths):
"""
Normalize version fields to strings in a data dictionary.

This function takes a dictionary and a collection of field paths, then normalizes
the version values at those paths to strings using normalize_version_to_string.

Args:
data (dict): The dictionary to process.
field_paths (iterable): An iterable of field path strings. Supports:
- Simple fields: "version"
- Nested fields: "tool.version"
- List fields: "versions[]"
- List item nested fields: "versions[].version"

Returns:
dict: The modified data dictionary with normalized version fields.

Raises:
TypeError: If data is not a dictionary.

Examples:
>>> data = {"version": 1, "versions": [{"version": 2}]}
>>> normalize_version_fields(data, ["version", "versions[].version"])
{'version': '1', 'versions': [{'version': '2'}]}
"""
if not isinstance(data, dict):
raise TypeError(f"Expected dict, got {type(data).__name__}")

for field_path in field_paths:
try:
if "[" in field_path:
if "[]." not in field_path:
list_key = field_path[:-2] if field_path.endswith("[]") else field_path
if list_key in data and isinstance(data[list_key], list):
data[list_key] = normalize_version_to_string(data[list_key])
else:
list_key, item_path = field_path.split("[].", 1)
if list_key in data and isinstance(data[list_key], list):
for item in data[list_key]:
if isinstance(item, dict) and item_path in item:
item[item_path] = normalize_version_to_string(
item[item_path]
)
elif "." in field_path:
keys = field_path.split(".")
current = data
for key in keys[:-1]:
if not isinstance(current, dict) or key not in current:
break
current = current[key]
else:
final_key = keys[-1]
if isinstance(current, dict) and final_key in current:
current[final_key] = normalize_version_to_string(
current[final_key]
)
else:
if field_path in data:
data[field_path] = normalize_version_to_string(data[field_path])
except (KeyError, TypeError, IndexError, AttributeError) as e:
logging.debug(f"Skipping field path '{field_path}': {e}")
continue

return data
12 changes: 12 additions & 0 deletions galaxytool-import/galaxytool-import.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import glob
import json
import os
import sys

import requests
from boltons.iterutils import remap

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from common.metadata import normalize_version_fields

GALAXY_ALL_TOOLS_METADATA = "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/all/resources/tools.json"
GALAXY_ALL_WORKFLOWS_METADATA = "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/all/resources/workflows.json"

Expand Down Expand Up @@ -73,6 +77,14 @@ def retrieve():
# store tool json in galaxy import folder
galaxy_tool_id = galaxy_tool_id.lower()
tool_cleaned = {k.replace(" ", "_"): v for k, v in tool.items()}
tool_cleaned = normalize_version_fields(
tool_cleaned,
[
"Suite_version",
"Latest_suite_conda_package_version",
"Related_Workflows[].latest_version",
],
)
save_path = os.path.join(galaxy_directory, f"{galaxy_tool_id}.galaxy.json")
with open(save_path, "w") as write_file:
json.dump(
Expand Down