diff --git a/.gitignore b/.gitignore index b7faf40..62198fd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,12 @@ +# added +.MyNotes/ +.MyNotes/* + + +# Generated by setuptools-scm (do not commit) +src/jflat/_version.py + + # Byte-compiled / optimized / DLL files __pycache__/ *.py[codz] diff --git a/README.md b/README.md index a9b5b86..3c57412 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,27 @@ +*** # jflat Utility functions to transform nested JSON schemas into a flat list of dictionaries. + +*** +# jflat + +A small Python utility to transform **nested JSON objects** into a **flat dictionary** with underscore‐separated keys. + +This project demonstrates: +- A clean `src/`-based Python package layout +- A minimal Pydantic schema example +- Conversion from nested Pydantic model output to a flat JSON using `flatten_json` +- Example schema JSON generation (`examples/schema_json.json`) +- Pytest-based unit tests + +--- + +## šŸ“¦ Installation (development mode) + +Clone the repository and install in editable mode: + +```bash +pip install -e . +``` + +*** diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d925e43 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,40 @@ +[build-system] +requires = ["setuptools>=61.0.0", "wheel", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[project] +name = "jflat" +dynamic = ["version"] +description = "Utility functions to flatten nested JSON into a flat dictionary." +readme = "README.md" +authors = [ + { name = "Maxim Kƶppel" } +] +requires-python = ">=3.10" + +dependencies = [ + "pydantic>=2.0" +] + +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", +] + +[project.urls] +repository = "https://github.com/BAMResearch/jflat" +homepage = "https://github.com/BAMResearch/jflat" + +[tool.setuptools] +package-dir = { "" = "src" } + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools_scm] +write_to = "src/jflat/_version.py" + +[tool.pytest.ini_options] +pythonpath = [ + "src" +] \ No newline at end of file diff --git a/src/examples/export_schema.py b/src/examples/export_schema.py new file mode 100644 index 0000000..92c0f2b --- /dev/null +++ b/src/examples/export_schema.py @@ -0,0 +1,15 @@ +from schema import Method +import json + +""" +File to create a JSON Schema (stored in `./examples/schema_json.json`) based on the example data model defined +in `./examples/schema.py`. + +This JSON Schema is produced by calling `Method.model_json_schema()`, where `Method` is defined in `schema.py`. +""" + +schema = Method.model_json_schema() + +with open("schema_json.json", "w") as f: + json.dump(schema, f, indent=2) + \ No newline at end of file diff --git a/src/examples/schema.py b/src/examples/schema.py new file mode 100644 index 0000000..31ecad4 --- /dev/null +++ b/src/examples/schema.py @@ -0,0 +1,68 @@ +from pydantic import BaseModel, Field + +class Example(BaseModel): + id: str = Field(..., description="The unique identifier") + + +class Person(BaseModel): + name: str = Field(..., description="The person's name") + age: int = Field(..., ge=0, description="The person's age in years") + example: Example + + +class BaseMethod(BaseModel): + author: str = Field(..., description="The author of the method") + + +class Method(BaseMethod): + method_name: str = Field(..., description="The name of the method") + person: Person + + +#ToDo Maybe you can define more types in this example? Something like str | none, and create more different Fields (so the printed JSON schema is actually richer and we can cover more cases) + + +""" +src/jflat/jflat.py def flatten_json(data: Dict[str, Any], parent_key: str = "") -> Dict[str, Any]: + +The goal of this function is more to map a JSON Schema (as printed by pydantic) to our new flattened version. The end result should look something like: + +{ + "$defs": { + "Method": { + "properties": { + "author": { + "description": "The author of the method", + "type": "string" + }, + "method_name": { + "description": "The name of the method", + "type": "string" + }, + "person": { + "$ref": "#/$defs/Person" + } + }, + "description": "......", + "$inherits_from": "#/$defs/BaseMethod", + }, + +}} +As you can see, I slightly modified the resulting JSON schema when printed using model_json_schema. The idea is to get rid off unnecessary stuff and adding some other info. I: + +Deleted title in each property +Deleted title in each object +Added an $inherits_from key in each of the objects dictionaries +Moved the Method defs inside $defs (before, it is outside because we are printing from it) +We need to add BaseMethod to define inheritances +Deleted all the required and type:object stuff +We could also: + +Add a key inside each property defining if they are mandatory or optional (this was before defined by required. Somethind like: + "author": { + "description": "The author of the method", + "type": "string", + "mandatory": true # this can also be false, if the property is optional (i.e., str | None) + }, + +""" \ No newline at end of file diff --git a/src/examples/schema_json.json b/src/examples/schema_json.json new file mode 100644 index 0000000..5eff27e --- /dev/null +++ b/src/examples/schema_json.json @@ -0,0 +1,65 @@ +{ + "$defs": { + "Example": { + "properties": { + "id": { + "description": "The unique identifier", + "title": "Id", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Example", + "type": "object" + }, + "Person": { + "properties": { + "name": { + "description": "The person's name", + "title": "Name", + "type": "string" + }, + "age": { + "description": "The person's age in years", + "minimum": 0, + "title": "Age", + "type": "integer" + }, + "example": { + "$ref": "#/$defs/Example" + } + }, + "required": [ + "name", + "age", + "example" + ], + "title": "Person", + "type": "object" + } + }, + "properties": { + "author": { + "description": "The author of the method", + "title": "Author", + "type": "string" + }, + "method_name": { + "description": "The name of the method", + "title": "Method Name", + "type": "string" + }, + "person": { + "$ref": "#/$defs/Person" + } + }, + "required": [ + "author", + "method_name", + "person" + ], + "title": "Method", + "type": "object" +} \ No newline at end of file diff --git a/src/examples/schema_json_aiGenerated.json b/src/examples/schema_json_aiGenerated.json new file mode 100644 index 0000000..689e8ee --- /dev/null +++ b/src/examples/schema_json_aiGenerated.json @@ -0,0 +1,76 @@ + +{ + "title": "Method", + "type": "object", + "properties": { + "author": { + "title": "Author", + "description": "The author of the method", + "type": "string" + }, + "method_name": { + "title": "Method Name", + "description": "The name of the method", + "type": "string" + }, + "person": { + "title": "Person", + "allOf": [ + { + "$ref": "#/$defs/Person" + } + ] + } + }, + "required": [ + "author", + "method_name", + "person" + ], + "$defs": { + "Example": { + "title": "Example", + "type": "object", + "properties": { + "id": { + "title": "Id", + "description": "The unique identifier", + "type": "string" + } + }, + "required": [ + "id" + ] + }, + "Person": { + "title": "Person", + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The person's name", + "type": "string" + }, + "age": { + "title": "Age", + "description": "The person's age in years", + "type": "integer", + "minimum": 0 + }, + "example": { + "title": "Example", + "allOf": [ + { + "$ref": "#/$defs/Example" + } + ] + } + }, + "required": [ + "name", + "age", + "example" + ] + } + } +} diff --git a/src/jflat/__init__.py b/src/jflat/__init__.py new file mode 100644 index 0000000..a4f3a96 --- /dev/null +++ b/src/jflat/__init__.py @@ -0,0 +1,4 @@ + +from .jflat import flatten_json + +__all__ = ["flatten_json"] diff --git a/src/jflat/jflat.py b/src/jflat/jflat.py new file mode 100644 index 0000000..424a07e --- /dev/null +++ b/src/jflat/jflat.py @@ -0,0 +1,31 @@ +from typing import Any, Dict + + +def flatten_json(data: Dict[str, Any], parent_key: str = "") -> Dict[str, Any]: + """ + Flatten a nested JSON-like dictionary into a flat dictionary + using underscore-separated keys. + + Example: + {"person": {"name": "Alice"}} + becomes: + {"person_name": "Alice"} + """ + result: Dict[str, Any] = {} + + for key, value in data.items(): + new_key = f"{parent_key}_{key}" if parent_key else key + + if isinstance(value, dict): + # Recursively flatten child dictionaries + result.update(flatten_json(value, new_key)) + else: + result[new_key] = value + + return result + + + + +def flatten_json_schema(): + pass \ No newline at end of file diff --git a/tests/test_jflat.py b/tests/test_jflat.py new file mode 100644 index 0000000..a1aebdb --- /dev/null +++ b/tests/test_jflat.py @@ -0,0 +1,25 @@ + +from jflat.jflat import flatten_json + + +def test_flatten_json_simple(): + data = {"a": 1, "b": 2} + assert flatten_json(data) == {"a": 1, "b": 2} + + +def test_flatten_json_nested(): + data = { + "person": { + "name": "Alice", + "info": { + "age": 30 + } + } + } + + flattened = flatten_json(data) + + assert flattened == { + "person_name": "Alice", + "person_info_age": 30 + }