research-software-ecosystem · arash77 · Jan 19, 2026 · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026
diff --git a/bioconda-import/bioconda_importer.py b/bioconda-import/bioconda_importer.py
@@ -1,11 +1,15 @@
 #!/usr/bin/env python
 
 import os
+import sys
 import yaml
 import argparse
 from pathlib import Path
 import jinja2
 
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from common.metadata import normalize_version_fields
+
 def clean(content_path):
     import_directory = os.path.join(content_path, "imports", "bioconda")
     os.makedirs(import_directory, exist_ok=True)
@@ -56,6 +60,7 @@ def merge(conda, content_path):
     biotools_data_path = os.path.join(content_path, 'data')
     for name, data in conda.items():
         try:
+            data = normalize_version_fields(data, ["package.version"])
             package_name = data['package']['name']
             import_file_path = os.path.join(bioconda_import_path, f"bioconda_{package_name}.yaml")
             with open(import_file_path, "w") as out:

diff --git a/bioconductor-import/import.py b/bioconductor-import/import.py
@@ -2,10 +2,14 @@
 import glob
 import json
 import os
+import sys
 import requests
 import logging
 import yaml
 
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from common.metadata import normalize_version_fields
+
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger()
@@ -86,6 +90,7 @@ def retrieve(version, filters=None):
         path = os.path.join("imports", "bioconductor", f"{package_name}.bioconductor.json")
 
         try:
+            pack = normalize_version_fields(pack, ["Version"])
             with open(path, "w") as write_file:
                 json.dump(pack, write_file, sort_keys=True, indent=4, separators=(",", ": "))
             logger.info(f"Saved {idx}/{total_packs} - {package_name}")

diff --git a/biotools-import/import.py b/biotools-import/import.py
@@ -7,6 +7,9 @@
 import requests
 from boltons.iterutils import remap
 
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from common.metadata import normalize_version_fields
+
 BIOTOOLS_DOMAIN = "https://bio.tools"
 SSL_VERIFY = True
 
@@ -49,6 +52,10 @@ def retrieve(filters=None):
             with open(os.path.join(directory, tpe_id + ".biotools.json"), "w") as write_file:
                 drop_false = lambda path, key, value: bool(value)
                 tool_cleaned = remap(tool, visit=drop_false)
+                tool_cleaned = normalize_version_fields(
+                    tool_cleaned, ["version"]
+                )
+
                 json.dump(
                     tool_cleaned, write_file, sort_keys=True, indent=4, separators=(",", ": ")
                 )

diff --git a/common/metadata.py b/common/metadata.py
@@ -0,0 +1,105 @@
+import logging
+
+
+def normalize_version_to_string(value):
+    """
+    Recursively convert version values to strings.
+
+    This function processes version data by converting numeric types to strings
+    while preserving None and boolean values. It recursively processes nested
+    structures (lists and dicts).
+
+    Args:
+        value: The value to normalize. Can be any type.
+
+    Returns:
+        - None and bool values are returned unchanged
+        - int and float values are converted to strings
+        - Lists are processed recursively, returning a new list with normalized values
+        - Dicts are processed recursively, returning a new dict with normalized values
+        - Other types are returned unchanged
+
+    Examples:
+        >>> normalize_version_to_string(1)
+        '1'
+        >>> normalize_version_to_string([1, 2, 3])
+        ['1', '2', '3']
+        >>> normalize_version_to_string({'version': 1.5})
+        {'version': '1.5'}
+    """
+    if value is None or isinstance(value, bool):
+        return value
+    if isinstance(value, (int, float)):
+        return str(value)
+    if isinstance(value, list):
+        return [normalize_version_to_string(v) for v in value]
+    if isinstance(value, dict):
+        return {k: normalize_version_to_string(v) for k, v in value.items()}
+    return value
+
+
+def normalize_version_fields(data, field_paths):
+    """
+    Normalize version fields to strings in a data dictionary.
+
+    This function takes a dictionary and a collection of field paths, then normalizes
+    the version values at those paths to strings using normalize_version_to_string.
+
+    Args:
+        data (dict): The dictionary to process.
+        field_paths (iterable): An iterable of field path strings. Supports:
+            - Simple fields: "version"
+            - Nested fields: "tool.version"
+            - List fields: "versions[]"
+            - List item nested fields: "versions[].version"
+
+    Returns:
+        dict: The modified data dictionary with normalized version fields.
+
+    Raises:
+        TypeError: If data is not a dictionary.
+
+    Examples:
+        >>> data = {"version": 1, "versions": [{"version": 2}]}
+        >>> normalize_version_fields(data, ["version", "versions[].version"])
+        {'version': '1', 'versions': [{'version': '2'}]}
+    """
+    if not isinstance(data, dict):
+        raise TypeError(f"Expected dict, got {type(data).__name__}")
+
+    for field_path in field_paths:
+        try:
+            if "[" in field_path:
+                if "[]." not in field_path:
+                    list_key = field_path[:-2] if field_path.endswith("[]") else field_path
+                    if list_key in data and isinstance(data[list_key], list):
+                        data[list_key] = normalize_version_to_string(data[list_key])
+                else:
+                    list_key, item_path = field_path.split("[].", 1)
+                    if list_key in data and isinstance(data[list_key], list):
+                        for item in data[list_key]:
+                            if isinstance(item, dict) and item_path in item:
+                                item[item_path] = normalize_version_to_string(
+                                    item[item_path]
+                                )
+            elif "." in field_path:
+                keys = field_path.split(".")
+                current = data
+                for key in keys[:-1]:
+                    if not isinstance(current, dict) or key not in current:
+                        break
+                    current = current[key]
+                else:
+                    final_key = keys[-1]
+                    if isinstance(current, dict) and final_key in current:
+                        current[final_key] = normalize_version_to_string(
+                            current[final_key]
+                        )
+            else:
+                if field_path in data:
+                    data[field_path] = normalize_version_to_string(data[field_path])
+        except (KeyError, TypeError, IndexError, AttributeError) as e:
+            logging.debug(f"Skipping field path '{field_path}': {e}")
+            continue
+
+    return data
diff --git a/galaxytool-import/galaxytool-import.py b/galaxytool-import/galaxytool-import.py
@@ -1,10 +1,14 @@
 import glob
 import json
 import os
+import sys
 
 import requests
 from boltons.iterutils import remap
 
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from common.metadata import normalize_version_fields
+
 GALAXY_ALL_TOOLS_METADATA = "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/all/resources/tools.json"
 GALAXY_ALL_WORKFLOWS_METADATA = "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/all/resources/workflows.json"
 
@@ -73,6 +77,14 @@ def retrieve():
         # store tool json in galaxy import folder
         galaxy_tool_id = galaxy_tool_id.lower()
         tool_cleaned = {k.replace(" ", "_"): v for k, v in tool.items()}
+        tool_cleaned = normalize_version_fields(
+            tool_cleaned,
+            [
+                "Suite_version",
+                "Latest_suite_conda_package_version",
+                "Related_Workflows[].latest_version",
+            ],
+        )
         save_path = os.path.join(galaxy_directory, f"{galaxy_tool_id}.galaxy.json")
         with open(save_path, "w") as write_file:
             json.dump(