Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fairscape_models/activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Activity(BaseModel):
"""Base class for Activity types (Computation, Annotation, Experiment)"""
guid: str = Field(alias="@id")
name: str
metadataType: Optional[str] = Field(default=None, alias="@type")
metadataType: Optional[Union[List[str], str]] = Field(default=['prov:Activity'], alias="@type")
description: str = Field(min_length=10)
associatedPublication: Optional[str] = Field(default=None)
generated: Optional[List[IdentifierValue]] = Field(default=[])
Expand Down
4 changes: 2 additions & 2 deletions fairscape_models/biochem_entity.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pydantic import BaseModel, Field, ConfigDict
from typing import Optional, List
from typing import Optional, List, Union

from fairscape_models.fairscape_base import IdentifierValue, IdentifierPropertyValue

Expand All @@ -9,7 +9,7 @@ class BioChemEntity(BaseModel):
This class can apply to Protiens, Genes, Chemical Entities, or Biological Samples
"""
guid: str = Field(alias="@id")
metadataType: Optional[str] = Field(default="BioChemEntity", alias="@type")
metadataType: Optional[Union[List[str], str]] = Field(default=['prov:Entity', 'evi:BioChemEntity'], alias="@type")
name: str
identifier: Optional[List[IdentifierPropertyValue]] = Field(default=[])
associatedDisease: Optional[IdentifierValue] = Field(default=None)
Expand Down
2 changes: 1 addition & 1 deletion fairscape_models/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from fairscape_models.activity import Activity

class Computation(Activity):
metadataType: Optional[str] = Field(default="https://w3id.org/EVI#Computation", alias="@type")
metadataType: Optional[Union[List[str], str]] = Field(default=['prov:Activity', "https://w3id.org/EVI#Computation"], alias="@type")
additionalType: Optional[str] = Field(default=COMPUTATION_TYPE)
runBy: Union[str, IdentifierValue]
dateCreated: str
Expand Down
2 changes: 1 addition & 1 deletion fairscape_models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from fairscape_models.digital_object import DigitalObject

class Dataset(DigitalObject):
metadataType: Optional[str] = Field(default="https://w3id.org/EVI#Dataset", alias="@type")
metadataType: Optional[Union[List[str], str]] = Field(default=['prov:Entity', "https://w3id.org/EVI#Dataset"], alias="@type")
additionalType: Optional[str] = Field(default=DATASET_TYPE)
datePublished: str = Field(...)
keywords: List[str] = Field(...)
Expand Down
2 changes: 1 addition & 1 deletion fairscape_models/digital_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class DigitalObject(BaseModel):
"""Base class for DigitalObject types (Dataset, Software, MLModel)"""
guid: str = Field(alias="@id")
name: str
metadataType: Optional[str] = Field(default=None, alias="@type")
metadataType: Optional[Union[List[str], str]] = Field(default=['prov:Entity', "https://w3id.org/EVI#DigitalObject"], alias="@type")
author: Union[str, IdentifierValue, List[Union[str, IdentifierValue]]]
description: str = Field(min_length=10)
version: str = Field(default="0.1.0")
Expand Down
2 changes: 1 addition & 1 deletion fairscape_models/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from fairscape_models.activity import Activity

class Experiment(Activity):
metadataType: Optional[str] = Field(default="https://w3id.org/EVI#Experiment", alias="@type")
metadataType: Optional[Union[List[str], str]] = Field(default=['prov:Activity', "https://w3id.org/EVI#Experiment"], alias="@type")
experimentType: str
runBy: Union[str, IdentifierValue]
datePerformed: str
Expand Down
2 changes: 1 addition & 1 deletion fairscape_models/instrument.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
class Instrument(BaseModel):
guid: str = Field(alias="@id")
name: str
metadataType: Optional[str] = Field(default="https://w3id.org/EVI#Instrument", alias="@type")
metadataType: Optional[Union[List[str], str]] = Field(default=['prov:Entity', "https://w3id.org/EVI#Instrument"], alias="@type")
manufacturer: str = Field(min_length=4)
model: str
description: str = Field(min_length=10)
Expand Down
2 changes: 1 addition & 1 deletion fairscape_models/mlmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from fairscape_models.digital_object import DigitalObject

class MLModel(DigitalObject):
metadataType: Optional[str] = Field(default="https://w3id.org/EVI#MLModel", alias="@type")
metadataType: Optional[Union[List[str], str]] = Field(default=['prov:Entity', "https://w3id.org/EVI#MLModel"], alias="@type")
additionalType: Optional[str] = Field(default=MLMODEL_TYPE)
dateModified: Optional[str] = Field(default=None)
fileFormat: str = Field(alias="format")
Expand Down
39 changes: 36 additions & 3 deletions fairscape_models/model_card.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from typing import List, Optional, Union
from pydantic import BaseModel, Field, ConfigDict
from pydantic import BaseModel, Field, ConfigDict, model_validator

from fairscape_models.fairscape_base import IdentifierValue
from fairscape_models.digital_object import DigitalObject


class ModelCard(BaseModel):
class ModelCard(DigitalObject):
"""Model Card for ML models as RO-Crate Dataset elements"""

model_config = ConfigDict(extra="allow")

guid: str = Field(alias="@id")
metadataType: Union[str, List[str]] = Field(alias="@type",default="https://w3id.org/EVI#MLModel")

metadataType: Optional[Union[List[str], str]] = Field(default=['prov:Entity', "https://w3id.org/EVI#MLModel"], alias="@type")
name: str
description: str
author: Union[str, List[str]]
Expand All @@ -22,6 +24,7 @@ class ModelCard(BaseModel):
modelFormat: Optional[Union[str, List[str]]] = Field(default=None)
trainingDataset: Optional[Union[str, List[IdentifierValue]]] = Field(default=None)
generatedBy: Optional[IdentifierValue] = Field(default=None)
derivedFrom: Optional[List[IdentifierValue]] = Field(default=[])

parameters: Optional[float] = Field(default=None)
inputSize: Optional[str] = Field(default=None)
Expand All @@ -37,3 +40,33 @@ class ModelCard(BaseModel):
citation: Optional[str] = Field(default=None)

isPartOf: Optional[List[IdentifierValue]] = Field(default=[])

@model_validator(mode='after')
def populate_prov_fields(self):
"""Auto-populate PROV-O fields from EVI fields"""

# Map generatedBy → prov:wasGeneratedBy
if self.generatedBy:
self.wasGeneratedBy = [self.generatedBy]
else:
self.wasGeneratedBy = []

if self.trainingDataset and self.derivedFrom == []:
if isinstance(self.trainingDataset, list):
self.derivedFrom = self.trainingDataset
else:
self.derivedFrom = [self.trainingDataset]

# Map derivedFrom → prov:wasDerivedFrom
self.wasDerivedFrom = self.derivedFrom or []

# Map author → prov:wasAttributedTo
if self.author:
if isinstance(self.author, str):
self.wasAttributedTo = [self.author]
elif isinstance(self.author, list):
self.wasAttributedTo = [a for a in self.author]
else:
self.wasAttributedTo = []

return self
9 changes: 3 additions & 6 deletions fairscape_models/rocrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,16 +193,13 @@ def normalize_type(type_str):
item_type = item["@type"]

if isinstance(item_type, list):
normalized_types = [normalize_type(t) for t in item_type]
if "ROCrate" in normalized_types or "Dataset" in normalized_types:
new_graph.append(ROCrateMetadataElem.model_validate(item))
continue
item_type = item_type[-1]

elif isinstance(item_type, str):
if isinstance(item_type, str):
normalized_type = normalize_type(item_type)
model_class_to_use = type_map.get(normalized_type)

# If we found a specific class, use it. Let it raise a
# If we found a specific class, use it.
if model_class_to_use:
new_graph.append(model_class_to_use.model_validate(item))
# Only if no specific class was matched, use the generic one.
Expand Down
2 changes: 1 addition & 1 deletion fairscape_models/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
class Sample(BaseModel):
guid: str = Field(alias="@id")
name: str
metadataType: Optional[str] = Field(default="https://w3id.org/EVI#Sample", alias="@type")
metadataType: Optional[Union[List[str], str]] = Field(default=['prov:Entity', "https://w3id.org/EVI#Sample"], alias="@type")
author: Union[str, List[str]]
description: str = Field(min_length=1)
keywords: List[str] = Field(...)
Expand Down
4 changes: 2 additions & 2 deletions fairscape_models/software.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from pydantic import Field, ConfigDict, model_validator
from typing import Optional, List
from typing import Optional, List, Union

from fairscape_models.fairscape_base import IdentifierValue, SOFTWARE_TYPE
from fairscape_models.digital_object import DigitalObject

class Software(DigitalObject):
metadataType: Optional[str] = Field(default="https://w3id.org/EVI#Software", alias="@type")
metadataType: Optional[Union[List[str], str]] = Field(default=['prov:Entity', "https://w3id.org/EVI#Software"], alias="@type")
additionalType: Optional[str] = Field(default=SOFTWARE_TYPE)
dateModified: Optional[str] = None
fileFormat: str = Field(title="fileFormat", alias="format")
Expand Down
141 changes: 141 additions & 0 deletions tests/test_model_card.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import pytest
from pydantic import ValidationError
from fairscape_models.model_card import ModelCard
from fairscape_models.fairscape_base import IdentifierValue

@pytest.fixture
def model_card_minimal_data():
"""Minimal data for a valid ModelCard."""
return {
"@id": "ark:59852/test-model-card",
"name": "Test Model Card",
"author": "Test Model Card Author",
"description": "This is a test model card with sufficient description.",
"keywords": ["machine learning", "test"],
"version": "1.0.0"
}

def test_model_card_instantiation(model_card_minimal_data):
"""Test successful instantiation of a ModelCard model."""
model_card = ModelCard.model_validate(model_card_minimal_data)
assert model_card.guid == model_card_minimal_data["@id"]
assert model_card.name == model_card_minimal_data["name"]

# Test PROV field auto-population
assert len(model_card.wasAttributedTo) == 1
assert isinstance(model_card.wasAttributedTo[0], str)
assert model_card.wasAttributedTo[0] == model_card_minimal_data["author"]

def test_model_card_missing_required_field(model_card_minimal_data):
"""Test ValidationError for missing a required field."""
del model_card_minimal_data["author"]
with pytest.raises(ValidationError):
ModelCard.model_validate(model_card_minimal_data)

def test_model_card_with_multiple_authors(model_card_minimal_data):
"""Test PROV field population with multiple authors."""
model_card_minimal_data["author"] = ["Card Author 1", "Card Author 2"]

model_card = ModelCard.model_validate(model_card_minimal_data)

# Test PROV:wasAttributedTo handles list of authors
assert len(model_card.wasAttributedTo) == 2
assert all(isinstance(item, str) for item in model_card.wasAttributedTo)
author_ids = [item for item in model_card.wasAttributedTo]
assert "Card Author 1" in author_ids
assert "Card Author 2" in author_ids

def test_model_card_with_generated_by_single(model_card_minimal_data):
"""Test PROV field population with single generatedBy."""
model_card_minimal_data["generatedBy"] = {"@id": "ark:59852/computation-1"}

model_card = ModelCard.model_validate(model_card_minimal_data)

# Test PROV:wasGeneratedBy with single value
assert len(model_card.wasGeneratedBy) == 1
assert isinstance(model_card.wasGeneratedBy[0], IdentifierValue)
assert model_card.wasGeneratedBy[0].guid == "ark:59852/computation-1"


def test_model_card_with_training_dataset_as_string(model_card_minimal_data):
"""Test PROV field population with trainingDataset as string."""
model_card_minimal_data["trainingDataset"] = "ark:59852/training-data"

model_card = ModelCard.model_validate(model_card_minimal_data)

# Test trainingDataset maps to derivedFrom and wasDerivedFrom
assert len(model_card.derivedFrom) == 1
assert model_card.derivedFrom[0] == "ark:59852/training-data"
assert len(model_card.wasDerivedFrom) == 1

def test_model_card_with_training_dataset_as_list(model_card_minimal_data):
"""Test PROV field population with trainingDataset as list."""
model_card_minimal_data["trainingDataset"] = [
{"@id": "ark:59852/training-data-1"},
{"@id": "ark:59852/training-data-2"}
]

model_card = ModelCard.model_validate(model_card_minimal_data)

# Test trainingDataset maps to derivedFrom and wasDerivedFrom
assert len(model_card.derivedFrom) == 2
assert all(isinstance(item, IdentifierValue) for item in model_card.derivedFrom)
assert len(model_card.wasDerivedFrom) == 2

def test_model_card_with_derived_from(model_card_minimal_data):
"""Test PROV field population with derivedFrom."""
model_card_minimal_data["derivedFrom"] = [
{"@id": "ark:59852/model-source"}
]

model_card = ModelCard.model_validate(model_card_minimal_data)

# Test PROV:wasDerivedFrom
assert len(model_card.wasDerivedFrom) == 1
assert isinstance(model_card.wasDerivedFrom[0], IdentifierValue)
assert model_card.wasDerivedFrom[0].guid == "ark:59852/model-source"

def test_model_card_derived_from_takes_precedence(model_card_minimal_data):
"""Test that derivedFrom takes precedence over trainingDataset."""
model_card_minimal_data["trainingDataset"] = [{"@id": "ark:59852/training-data"}]
model_card_minimal_data["derivedFrom"] = [{"@id": "ark:59852/model-source"}]

model_card = ModelCard.model_validate(model_card_minimal_data)

# derivedFrom should remain as specified, not be overwritten by trainingDataset
assert len(model_card.derivedFrom) == 1
assert model_card.derivedFrom[0].guid == "ark:59852/model-source"

def test_model_card_edge_case_empty_author():
"""Test PROV field population when author is falsy (defensive code path)."""
# Test with empty list for author (valid but falsy)
model_card_data = {
"@id": "ark:59852/test-model-card",
"name": "Test Model Card",
"author": [],
"description": "This is a test model card with sufficient description.",
"keywords": ["test"],
"version": "1.0.0"
}

model_card = ModelCard.model_validate(model_card_data)

# Should hit the else clause and set wasAttributedTo to empty list
assert model_card.wasAttributedTo == []

def test_model_card_edge_case_no_generated_by():
"""Test PROV field population when generatedBy is None."""
model_card_data = {
"@id": "ark:59852/test-model-card",
"name": "Test Model Card",
"author": "Test Author",
"description": "This is a test model card with sufficient description.",
"keywords": ["test"],
"version": "1.0.0",
"generatedBy": None
}

model_card = ModelCard.model_validate(model_card_data)

# Should set wasGeneratedBy to empty list
assert model_card.wasGeneratedBy == []