dataforgoodfr · OlivierCoen · Apr 22, 2026 · Apr 11, 2026 · Apr 12, 2026 · Apr 13, 2026
diff --git a/.github/workflows/coordo-py.yml b/.github/workflows/coordo-py.yml
@@ -34,3 +34,6 @@ jobs:
 
       - name: Run tests
         run: uv run test.py
+
+      - name: Run pytest
+        run: uv run pytest
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,4 @@ uv.lock
 .DS_Store
 catalog/
 data/
-.vscode/
+.vscode/
diff --git a/coordo-py/README.md b/coordo-py/README.md
@@ -90,3 +90,18 @@ FROM parents LEFT OUTER JOIN children ON parents.id = children.parent_id)
  SELECT anon_1.avg_1
 FROM anon_1
 ```
+
+
+## Development
+
+### Data types
+
+#### KoboToolbox
+
+For surveys, Kobotoolbox uses the standard XLSForm format.
+Briefly, each `xlsx` file contains 3 sheets:
+- `survey`
+- `choices`
+- `settings`
+
+All information can be found at https://support.kobotoolbox.org/edit_forms_excel.html
diff --git a/coordo-py/coordo/__init__.py b/coordo-py/coordo/__init__.py
@@ -0,0 +1,23 @@
+# Copyright COORDONNÉES 2025, 2026
+# SPDX-License-Identifier: MPL-2.0
+
+import warnings
+
+
+# ignore warnings due to shadowing of Pydantic's "schema" field in "Resource"
+REGEX_TO_IGNORE = (
+    'Field name "schema" in "Resource" shadows an attribute in parent "(Base)?Model"'
+)
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    module="dplib",
+    message=REGEX_TO_IGNORE,
+)
+
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    module="coordo",
+    message=REGEX_TO_IGNORE,
+)
diff --git a/coordo-py/coordo/cli.py b/coordo-py/coordo/cli.py
@@ -5,9 +5,8 @@
 from pathlib import Path
 
 import typer
-from dplib.models.schema.foreignKey import ForeignKey, ForeignKeyReference
 
-from coordo import loaders
+from coordo.loaders import ResourceAction, KoboToolboxLoader, FileLoader
 from coordo.datapackage import DataPackage
 from coordo.sql.builder import build_query
 
@@ -83,25 +82,18 @@ def kobotoolbox(
     xlsform: Path,
     xlsdata: Path,
     package: Path = typer.Option(help="Path to the package directory"),
+    action: ResourceAction = typer.Option(help="Action to perform on resource"),
 ):
-    dp = DataPackage.from_path(package)
-    loaders.kobotoolbox.load(dp, xlsform, xlsdata)
-    dp.save()
+    KoboToolboxLoader(package, xlsform, xlsdata, action).etl()
 
 
 @load.command()
 def file(
     path: Path,
     package: Path = typer.Option(".", help="Path to the package directory"),
+    action: ResourceAction = typer.Option(help="Action to perform on resource"),
 ):
-    dp = DataPackage.from_path(package)
-    try:
-        loaders.file.load(dp, path)
-    except ValueError as e:
-        raise typer.BadParameter(
-            f"{e} Add --overwrite if you wish to continue.", param_hint="path"
-        )
-    dp.save()
+    FileLoader(package, path, action).etl()
 
 
 app.add_typer(load, name="load")
@@ -119,13 +111,27 @@ def add_foreignkey(
     dp.get_resource(
         resource,
     ).add_foreignkey(
-        ForeignKey(
-            fields=[field],
-            reference=ForeignKeyReference(
-                fields=[foreign_field],
-                resource=None if resource == foreign_resource else foreign_resource,
-            ),
-        ),
+        fields=[field],
+        foreign_fields=[foreign_field],
+        foreign_resource=foreign_resource,
+    )
+    dp.save()
+
+@app.command()
+def remove_foreignkey(
+    from_: str,
+    to: str,
+    package: Path = typer.Option(".", help="Path to the package directory"),
+):
+    dp = DataPackage.from_path(package)
+    resource, field = from_.split(".")
+    foreign_resource, foreign_field = to.split(".")
+    dp.get_resource(
+        resource,
+    ).remove_foreignkey(
+        fields=[field],
+        foreign_fields=[foreign_field],
+        foreign_resource=foreign_resource,
     )
     dp.save()
 

diff --git a/coordo-py/coordo/datapackage/package.py b/coordo-py/coordo/datapackage/package.py
@@ -40,6 +40,13 @@ def handle_path(path: str | list[str]) -> str:
     return path
 
 
+def check_resource_fields_match(res1: Resource, res2: Resource) -> None:
+    if not res1.has_same_schema_as(res2):
+        raise ValueError(
+            f"Schemas do not match for resources {res1.name!r} and {res2.name!r}"
+        )
+
+
 class DataPackage(pydantic.BaseModel):
     id: Optional[str] = None
     name: str = pydantic.Field(pattern=r"^[a-z0-9._-]+$")
@@ -91,39 +98,64 @@ def save(self):
         )
 
     def remove_resource(self, name: str) -> None:
+        """
+        Remove a resource from the package.
+        For all other resources in the current datapackage, check if they have a foreign key pointing to this resource
+        and raise error it if so.
+        Args:
+            name (str): the name of the resource to remove
+        """
+        print(f"Removing resource {name!r} from DataPackage {self.name!r}")
         resource = self.get_resource(name=name)
+        # looping over all resources in the current datapackage, other than <resource>
         for res in self.resources:
             if res.name == name:
                 continue
-            sm = safe(res, "schema")
-            if sm.foreignKeys:
-                for fk in sm.foreignKeys:
-                    assert fk.reference.resource != name, (
-                        f"Can't remove the resource {name} : {res.name} have a foreign key pointing to this resource."
-                    )
+            res_schema = safe(res, "schema")
+            if res_schema.foreignKeys:
+                for fk in res_schema.foreignKeys:
+                    if fk.reference.resource == name:
+                        # build a string containing the list of foreign key field pairs 
+                        fk_part_names_str = "\n".join(res.get_fk_names(fk))
+                        raise ValueError(
+                            f"Can't remove the resource {name!r} : {res.name!r} has a foreign key pointing to this resource. "
+                            f"Please remove the following foreign keys beforehand:\n{fk_part_names_str}"
+                        )
+        # remove the file associated with the resource
         if resource.path:
             path = handle_path(resource.path)
             Path(self._basepath / path).unlink()
+        # update resources list
         self.resources = [res for res in self.resources if res.name != name]
 
     def add_resource(self, resource: Resource) -> None:
-        if any(res.name == resource.name for res in self.resources):
+        print(f"Adding resource {resource.name!r} to package {self.name!r}")
+        if self.resource_exists(resource.name):
             raise ValueError(
-                f"A resource named {resource.name} already exists in package {self.name}."
+                f"A resource named {resource.name!r} already exists in package {self.name!r}. "
+                "Please remove the existing resource before adding a new one."
             )
-        resource._package = self
-        self.resources.append(resource)
+        else:
+            resource._package = self
+            self.resources.append(resource)
+
+    def update_resource(self, resource: Resource) -> None:
+        pass
 
     def get_resource(self, name: str) -> Resource:
-        resource = next(res for res in self.resources if res.name == name)
-        assert resource is not None, f"Resource {name} not found."
-        return resource
+        found_resources = [res for res in self.resources if res.name == name]
+        assert len(found_resources) <= 1, f"Multiple resources named {name!r} found."
+        assert len(found_resources) > 0, f"Resource {name!r} not found."
+        return found_resources[0]
 
     def write_resource(self, resource_name: str, it: Iterable[dict]):
         pass
         # resource = self.get_resource(name=resource_name)
         # schema = resource.get_schema()
 
+    def resource_exists(self, name: str) -> bool:
+        return any(res.name == name for res in self.resources)
+
     def prepare_db(self) -> tuple[duckdb.DuckDBPyConnection, sa.MetaData]:
         conn = load_conn()
 

diff --git a/coordo-py/coordo/datapackage/resource.py b/coordo-py/coordo/datapackage/resource.py
@@ -5,7 +5,7 @@
 
 import duckdb
 import pydantic
-from dplib.models import Contributor, Dialect, ForeignKey, License, Schema, Source
+from dplib.models import Contributor, Dialect, ForeignKey, ForeignKeyReference, License, Schema, Source
 from pydantic import model_validator
 
 from .db_helpers import prepare_path
@@ -50,7 +50,21 @@ def load_table(self, conn: duckdb.DuckDBPyConnection):
         query = f'CREATE VIEW "{self.name}" AS SELECT * FROM {prepare_path(self.package._basepath / self.path)}'
         conn.execute(query)
 
-    def add_foreignkey(self, fk: ForeignKey) -> None:
+    def add_foreignkey(self, fields: list[str], foreign_fields: list[str], foreign_resource: str) -> None:
+        # TODO: remove this check when addition of multiple fields at once is supported
+        if len(fields) > 1 or len(foreign_fields) > 1:
+            raise ValueError("Adding a foreign key with multiple fields is not supported yet.")
+
+        fk = ForeignKey(
+            fields=fields,
+            reference=ForeignKeyReference(
+                fields=foreign_fields,
+                resource=None if self.name == foreign_resource else foreign_resource,
+            )
+        )
+        fk_part_names_str = " & ".join(self.get_fk_names(fk))
+        print(f"Adding foreign key {fk_part_names_str}")
+
         if not self._package:
             raise ValueError("You can't add a foreign key to an orphan resource.")
         field_names = [f.name for f in self.schema.fields]
@@ -64,15 +78,43 @@ def add_foreignkey(self, fk: ForeignKey) -> None:
         )
         field_names = [f.name for f in parent_resource.schema.fields]
         for f in fk.reference.fields:
-            assert (
-                f in field_names
-            ), f"Resource {parent_resource.name} has no field named {f}"
+            assert f in field_names, (
+                f"Resource {parent_resource.name} has no field named {f}"
+            )
+        if fk in self.schema.foreignKeys:
+            raise ValueError(f"Foreign key {fk_part_names_str} already exists in resource {self.name}")
         self.schema.foreignKeys.append(fk)
 
+    def remove_foreignkey(self, fields: list[str], foreign_fields: list[str], foreign_resource: str) -> None:
+        fk = ForeignKey(
+            fields=fields,
+            reference=ForeignKeyReference(
+                fields=foreign_fields,
+                resource=None if self.name == foreign_resource else foreign_resource,
+            )
+        )
+        fk_part_names_str = " & ".join(self.get_fk_names(fk))
+        print(f"Removing foreign key {fk_part_names_str}")
+        if fk not in self.schema.foreignKeys:
+            raise ValueError(f"Foreign key {fk_part_names_str} not found in resource {self.name}")
+        self.schema.foreignKeys.remove(fk)
+
     @model_validator(mode="after")
     def check_data_or_path(self) -> Self:
         provided = [self.data, self.path]
         count = len([f for f in provided if f is not None])
         if count != 1:
             raise ValueError("Exactly one of 'data' or 'path' must be provided.")
         return self
+
+    def has_same_schema_as(self, other: Self) -> bool:
+        for attr in vars(self.schema):
+            if getattr(self.schema, attr) != getattr(other.schema, attr):
+                return False
+        return True
+
+    def get_fk_names(self, fk: ForeignKey) -> list[str]:
+        return [
+            f"'{self.name}.{field}' -> '{fk.reference.resource}.{reference_field}'"
+            for field, reference_field in zip(fk.fields, fk.reference.fields)
+        ]
diff --git a/coordo-py/coordo/loaders/__init__.py b/coordo-py/coordo/loaders/__init__.py
@@ -1,6 +1,9 @@
 # Copyright COORDONNÉES 2025, 2026
 # SPDX-License-Identifier: MPL-2.0
 
-from . import file, kobotoolbox
+from .loader import ResourceAction
+from .kobotoolbox_loader import KoboToolboxLoader
+from .file_loader import FileLoader
 
-__all__ = ["kobotoolbox", "file"]
+
+__all__ = ["ResourceAction", "KoboToolboxLoader", "FileLoader"]
diff --git a/coordo-py/coordo/loaders/file.py b/coordo-py/coordo/loaders/file.py
diff --git a/coordo-py/coordo/loaders/file_loader.py b/coordo-py/coordo/loaders/file_loader.py
@@ -0,0 +1,37 @@
+# Copyright COORDONNÉES 2025, 2026
+# SPDX-License-Identifier: MPL-2.0
+
+import shutil
+from pathlib import Path
+
+from coordo.loaders.loader import Loader, ResourceAction
+from ..datapackage import Field, Resource, Schema
+from ..datapackage.db_helpers import prepare_path, to_dp_type
+
+
+class FileLoader(Loader):
+    def __init__(self, package: Path, path: Path, action: ResourceAction):
+        super().__init__(package, action)
+        self.path = path
+
+    def extract(self):
+        query = f"SELECT * FROM {prepare_path(self.path)}"
+        conn, _ = self.dp.prepare_db()
+        rel = conn.sql(query)
+        schema = Schema()
+        for name, type in zip(rel.columns, rel.types):
+            schema.add_field(Field(name=name, **to_dp_type(type)))
+        conn.close()
+        # creating resurce for file
+        resource = Resource(
+            name=self.path.stem,
+            path=self.path.name,
+            schema=schema,
+        )
+        self.resources = [resource]
+
+    def transform(self):
+        pass
+
+    def load(self):
+        shutil.copy(self.path, self.dp._basepath / self.path.name)