Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 22 additions & 22 deletions fairscape_models/conversion/models/AIReady.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class SubCriterionScore(BaseModel):

class FairnessScore(BaseModel):
findable: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No persistent identifier found"
has_content=False, details="No persistent identifier found. To add an identifier, set 'identifier' (for DOI) or '@id' in root dataset"
))
accessible: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=True, details="The RO-Crate's JSON-LD metadata is machine-readable and publicly accessible by design."
Expand All @@ -21,85 +21,85 @@ class FairnessScore(BaseModel):
has_content=True, details="The dataset uses the schema.org vocabulary within the RO-Crate framework and conforms to the Croissant RAI specification for interoperability."
))
reusable: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No license specified"
has_content=False, details="No license specified. To add a license, set 'license' in root dataset"
))

class ProvenanceScore(BaseModel):
transparent: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No root datasets identified"
has_content=False, details="No root datasets identified. To document datasets, add entities with @type 'Dataset' to metadata graph"
))
traceable: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No transformation steps documented"
has_content=False, details="No transformation steps documented. To document workflows, add entities with @type 'Computation' or 'Experiment' to metadata graph"
))
interpretable: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No software documented"
has_content=False, details="No software documented. To document software, add entities with @type 'Software' to metadata graph"
))
key_actors_identified: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No key actors identified"
has_content=False, details="No key actors identified. To add actors, set 'author', 'publisher', or 'principalInvestigator' in root dataset"
))

class CharacterizationScore(BaseModel):
semantics: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=True, details="Data is semantically described using the schema.org vocabulary within a machine-readable RO-Crate."
))
statistics: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No statistical characterization available"
has_content=False, details="No statistical characterization available. To add statistics, set 'contentSize' and/or 'hasSummaryStatistics' in Dataset/ROCrate entities"
))
standards: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No schemas provided for datasets."
has_content=False, details="No schemas provided for datasets. To document schemas, add entities with @type 'schema' to metadata graph"
))
potential_sources_of_bias: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No bias description provided"
has_content=False, details="No bias description provided. To document biases, set 'rai:dataBiases' in root dataset"
))
data_quality: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="Data quality procedures not documented"
has_content=False, details="Data quality procedures not documented. To document quality, set 'rai:dataCollectionMissingData' in root dataset"
))

class PreModelExplainabilityScore(BaseModel):
data_documentation_template: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=True, details="Documentation is provided via the RO-Crate's structured JSON-LD metadata, this HTML Datasheet, and Croissant RAI properties."
))
fit_for_purpose: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No use cases or limitations specified"
has_content=False, details="No use cases or limitations specified. To document purpose, set 'rai:dataUseCases' and/or 'rai:dataLimitations' in root dataset"
))
verifiable: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No files to verify"
has_content=False, details="No checksums available. To add checksums for verification, set 'md5' or 'MD5' in Dataset/Software/ROCrate entities"
))

class EthicsScore(BaseModel):
ethically_acquired: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No ethical acquisition information"
has_content=False, details="No ethical acquisition information. To document data collection, set 'rai:dataCollection' and/or additionalProperty with name='Human Subject' in root dataset"
))
ethically_managed: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No ethical management information"
has_content=False, details="No ethical management information. To document ethical oversight, set 'ethicalReview' and/or additionalProperty with name='Data Governance Committee' in root dataset"
))
ethically_disseminated: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No dissemination controls specified"
has_content=False, details="No dissemination controls specified. To document usage controls, set 'license', 'rai:personalSensitiveInformation', and/or additionalProperty with name='Prohibited Uses' in root dataset"
))
secure: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No security requirements specified"
has_content=False, details="No security requirements specified. To document security level, set 'confidentialityLevel' in root dataset"
))

class SustainabilityScore(BaseModel):
persistent: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No persistent identifier found"
has_content=False, details="No persistent identifier found. To add an identifier, set 'identifier' (for DOI) or '@id' in root dataset"
))
domain_appropriate: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No maintenance plan specified"
has_content=False, details="Data release plan not documented. To add a release plan, set 'rai:dataReleaseMaintenancePlan' in root dataset"
))
well_governed: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No governance structure specified"
has_content=False, details="No governance structure specified. To document governance, set additionalProperty with name='Data Governance Committee' in root dataset"
))
associated: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=True, details="All data, software, and computations are explicitly linked within the RO-Crate's provenance graph."
))

class ComputabilityScore(BaseModel):
standardized: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No format information available"
has_content=False, details="No format information available. To document file formats, set 'format' in Dataset/Software entities"
))
computationally_accessible: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=False, details="No publisher provided."
has_content=False, details="No publisher provided. To specify publisher, set 'publisher' in root dataset"
))
portable: SubCriterionScore = Field(default_factory=lambda: SubCriterionScore(
has_content=True, details="The dataset is packaged as a self-contained RO-Crate, a standard designed for portability across systems."
Expand All @@ -109,7 +109,7 @@ class ComputabilityScore(BaseModel):
))

class AIReadyScore(BaseModel):
name: str
name: str = "AI-Ready Score"
fairness: FairnessScore = Field(default_factory=FairnessScore)
provenance: ProvenanceScore = Field(default_factory=ProvenanceScore)
characterization: CharacterizationScore = Field(default_factory=CharacterizationScore)
Expand Down
1 change: 0 additions & 1 deletion fairscape_models/rocrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ class ROCrateMetadataElem(BaseModel):
rai_personal_sensitive_information: Optional[List[str]] = Field(alias="rai:personalSensitiveInformation", default=None)
rai_data_social_impact: Optional[str] = Field(alias="rai:dataSocialImpact", default=None)
rai_annotations_per_item: Optional[str] = Field(alias="rai:annotationsPerItem", default=None)
rai_annotator_demographics: Optional[List[str]] = Field(alias="rai:annotatorDemographics", default=None)
rai_machine_annotation_tools: Optional[List[str]] = Field(alias="rai:machineAnnotationTools", default=None)

# Aggregated metrics for AI-Ready scoring (roll-up properties from sub-crates)
Expand Down