From 10f567e549c6e0289f7d2c9aa00f0098efbefeba Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Tue, 20 Jan 2026 13:02:06 +0100 Subject: [PATCH 01/23] Update .gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index b7faf40..0e5701b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# added +.MyNotes/ +.MyNotes/* + # Byte-compiled / optimized / DLL files __pycache__/ *.py[codz] From 29ab7eadfbe9a3c4286252e7ce72c3497af7cab5 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Thu, 22 Jan 2026 10:44:34 +0100 Subject: [PATCH 02/23] Update pyproject.toml src/ tests/ --- examples/demo_schema.py | 23 +++ examples/schema.json | 7 + pyproject.toml | 18 ++ src/jflat/__init__.py | 3 + src/jflat/cli.py | 71 ++++++++ src/jflat/cli_schema.py | 94 ++++++++++ src/jflat/core/flattener.py | 109 ++++++++++++ src/jflat/core/schema_base.py | 45 +++++ src/jflat/core/schema_flattern.py | 167 ++++++++++++++++++ src/jflat/models/io.py | 58 ++++++ tests/test_src/test_core/test_flattener.py | 0 tests/test_src/test_core/test_schema_base.py | 0 .../test_core/test_schema_flattern.py | 0 tests/test_src/test_models/test_io.py | 0 14 files changed, 595 insertions(+) create mode 100644 examples/demo_schema.py create mode 100644 examples/schema.json create mode 100644 pyproject.toml create mode 100644 src/jflat/__init__.py create mode 100644 src/jflat/cli.py create mode 100644 src/jflat/cli_schema.py create mode 100644 src/jflat/core/flattener.py create mode 100644 src/jflat/core/schema_base.py create mode 100644 src/jflat/core/schema_flattern.py create mode 100644 src/jflat/models/io.py create mode 100644 tests/test_src/test_core/test_flattener.py create mode 100644 tests/test_src/test_core/test_schema_base.py create mode 100644 tests/test_src/test_core/test_schema_flattern.py create mode 100644 tests/test_src/test_models/test_io.py diff --git a/examples/demo_schema.py b/examples/demo_schema.py new file mode 100644 index 0000000..1346c93 --- /dev/null +++ b/examples/demo_schema.py @@ -0,0 +1,23 @@ +# examples/demo.py +# Minimal demo that loads a sample JSON, flattens it, and prints the result. + +import json +from pathlib import Path +from jflat.models.io import FlattenOptions, FlattenRequest +from jflat.core.flattener import flatten + +INPUT = Path(__file__).parent / "sample.json" + +with INPUT.open("r", encoding="utf-8") as f: + raw = json.load(f) + +# Build request with default options: "_" separator, preserve lists +req = FlattenRequest( + data=raw, + options=FlattenOptions(sep="_", preserve_lists=True), +) + +# Perform flattening and print +resp = flatten(req) +print("Mode:", resp.mode) +print(json.dumps(resp.result.data, ensure_ascii=False, indent=2)) \ No newline at end of file diff --git a/examples/schema.json b/examples/schema.json new file mode 100644 index 0000000..a1a2afa --- /dev/null +++ b/examples/schema.json @@ -0,0 +1,7 @@ +{ + "title": "Inception", + "director": { + "name": "Christopher Nolan", + "age": 50 + } +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..49d57e0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ + +[project] +name = "jflat" +version = "0.2.0" +requires-python = ">=3.10" + +dependencies = [ + "pydantic>=2.12,<3", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0.0", + "pytest-cov>=5.0.0", +] + +[tool.pytest.ini_options] +addopts = "-q" diff --git a/src/jflat/__init__.py b/src/jflat/__init__.py new file mode 100644 index 0000000..e973abd --- /dev/null +++ b/src/jflat/__init__.py @@ -0,0 +1,3 @@ +# jflat/__init__.py +# Export subpackages for convenient importing +__all__ = ["core", "models"] \ No newline at end of file diff --git a/src/jflat/cli.py b/src/jflat/cli.py new file mode 100644 index 0000000..269d4df --- /dev/null +++ b/src/jflat/cli.py @@ -0,0 +1,71 @@ +# jflat/cli.py +# A tiny CLI for demos: read JSON, flatten it, print or write to a file. + +from __future__ import annotations +import argparse +import json +import sys +from pathlib import Path +from jflat.models.io import FlattenOptions, FlattenRequest +from jflat.core.flattener import flatten_to_dict + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Flatten a nested JSON file.") + parser.add_argument("input", type=Path, help="Path to input JSON file") + parser.add_argument("-o", "--output", type=Path, help="Path to write flattened JSON") + parser.add_argument("--sep", default="_", help="Key separator (default: _)") + parser.add_argument("--max-depth", type=int, default=None, help="Maximum depth to flatten") + parser.add_argument( + "--preserve-lists", + action="store_true", + help="Keep arrays as-is (default).", + ) + parser.add_argument( + "--index-lists", + action="store_true", + help="Flatten arrays by numeric index into keys.", + ) + + args = parser.parse_args(argv) + + # Validate mutually exclusive flags for list handling + if args.preserve_lists and args.index_lists: + print("Choose either --preserve-lists OR --index-lists (not both).", file=sys.stderr) + return 2 + + # Default: preserve lists + preserve_lists = True + if args.index_lists: + preserve_lists = False + + # Read input JSON file + with args.input.open("r", encoding="utf-8") as f: + raw = json.load(f) + + # Build the Pydantic request model + req = FlattenRequest( + data=raw, + options=FlattenOptions( + sep=args.sep, + max_depth=args.max_depth, + preserve_lists=preserve_lists, + ), + ) + + # Perform flattening + flat = flatten_to_dict(req).data + + # Write to file or print to stdout + if args.output: + with args.output.open("w", encoding="utf-8") as f: + json.dump(flat, f, ensure_ascii=False, indent=2) + print(f"Wrote flattened JSON to {args.output}") + else: + print(json.dumps(flat, ensure_ascii=False, indent=2)) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) \ No newline at end of file diff --git a/src/jflat/cli_schema.py b/src/jflat/cli_schema.py new file mode 100644 index 0000000..1a31396 --- /dev/null +++ b/src/jflat/cli_schema.py @@ -0,0 +1,94 @@ +# jflat/cli_schema.py +""" +Command Line Interface (CLI) for flattening a *Pydantic JSON Schema*. + +This file is meant to be beginner-friendly: +- It shows how to read a JSON file from disk +- It shows how to call our library code (the flattener) +- It prints the result, or optionally writes it to a file + +How to run (from repo root): + python -m jflat.cli_schema -h + python -m jflat.cli_schema examples/schema.json + python -m jflat.cli_schema examples/schema.json -o out.json +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +from jflat.core.schema_flattener import PydanticSchemaFlattener + + +def main(argv: list[str] | None = None) -> int: + """ + Entry point for the CLI. + + Why do we return an int? + - In command-line programs, returning 0 means "success" + - Non-zero means "something went wrong" + This is standard for professional CLI tools and CI pipelines. + """ + parser = argparse.ArgumentParser( + prog="jflat.cli_schema", + description="Flatten a Pydantic JSON Schema ($defs/$ref) into a teaching-friendly structure.", + ) + + # Positional argument: path to schema JSON + parser.add_argument( + "schema", + type=Path, + help="Path to a JSON Schema file generated by Pydantic (MyModel.model_json_schema()).", + ) + + # Optional argument: output file + parser.add_argument( + "-o", + "--output", + type=Path, + default=None, + help="If provided, write the flattened result to this JSON file instead of printing.", + ) + + args = parser.parse_args(argv) + + # --- 1) Read the input schema JSON file --- + if not args.schema.exists(): + # Beginner-friendly error message + print(f"ERROR: file not found: {args.schema}") + return 2 + + try: + with args.schema.open("r", encoding="utf-8") as f: + schema_dict = json.load(f) + except json.JSONDecodeError as e: + print(f"ERROR: input is not valid JSON: {args.schema}") + print(f"Details: {e}") + return 2 + + # --- 2) Call our library code (flattener) --- + try: + flattener = PydanticSchemaFlattener(schema_dict) + result_json = flattener.to_json(indent=2) # JSON string for printing/writing + except Exception as e: + # In production you might use logging; for prototype we show clear errors + print("ERROR: could not flatten schema.") + print(f"Details: {e}") + return 2 + + # --- 3) Output --- + if args.output: + args.output.write_text(result_json, encoding="utf-8") + print(f"✅ Wrote flattened schema to: {args.output}") + else: + print(result_json) + + return 0 + + +# This block is the reason `python -m jflat.cli_schema ...` works: +# It runs only when executed as a script/module, not when imported in tests. +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/jflat/core/flattener.py b/src/jflat/core/flattener.py new file mode 100644 index 0000000..c669268 --- /dev/null +++ b/src/jflat/core/flattener.py @@ -0,0 +1,109 @@ +# jflat/core/flattener.py +# Core flattening logic. +# The public APIs accept/return Pydantic models (FlattenRequest/FlattenResponse), +# which improves clarity, validation, and maintainability. + +from __future__ import annotations +from typing import Any +from jflat.models.io import FlattenOptions, FlattenRequest, FlattenedDict, FlattenResponse + + +def _flatten( + data: Any, + *, + parent_key: str = "", + options: FlattenOptions, + depth: int = 0, +) -> dict[str, Any]: + """ + Internal recursive function that flattens 'data' into a simple dict. + + Behavior: + - dict: traverse its keys and accumulate flattened entries + - list: either preserve as list (preserve_lists=True) or index its items into keys + - primitive (str/int/float/bool/None): stored under the current parent key + - max_depth: if set and reached, stop descending and store the nested structure as-is + + Args: + data: any JSON-like structure (dict, list, primitive) + parent_key: current flattened key prefix + options: FlattenOptions (separator, depth, list handling) + depth: current recursion depth + + Returns: + A flat dict with composite keys joined by 'options.sep'. + """ + flat: dict[str, Any] = {} + sep = options.sep + + # If we hit the depth limit, stop recursion and store the value as-is. + if options.max_depth is not None and depth >= options.max_depth: + if parent_key: + flat[parent_key] = data + else: + # Root-level with depth exceeded is rare; still keep content visible. + if isinstance(data, dict): + for k, v in data.items(): + flat[k] = v + else: + flat["value"] = data + return flat + + # Handle dicts: recurse into each key/value + if isinstance(data, dict): + for k, v in data.items(): + new_key = f"{parent_key}{sep}{k}" if parent_key else k + flat.update(_flatten(v, parent_key=new_key, options=options, depth=depth + 1)) + return flat + + # Handle lists: either keep the list or index into new keys + if isinstance(data, list): + if options.preserve_lists: + # Keep the entire list under the current key + if parent_key: + flat[parent_key] = data + else: + flat["list"] = data + return flat + else: + # Flatten list items using numeric indices + for idx, item in enumerate(data): + new_key = f"{parent_key}{sep}{idx}" if parent_key else str(idx) + flat.update(_flatten(item, parent_key=new_key, options=options, depth=depth + 1)) + return flat + + # Handle primitives: store the value under the current key + if parent_key: + flat[parent_key] = data + else: + flat["value"] = data + return flat + + +def flatten_to_dict(req: FlattenRequest) -> FlattenedDict: + """ + Public API: flatten a nested JSON object to a flat dict according to options. + + Args: + req: FlattenRequest Pydantic model (data + options) + + Returns: + FlattenedDict: Pydantic model containing the flat map + """ + flat = _flatten(req.data, parent_key="", options=req.options, depth=0) + return FlattenedDict(data=flat) + + +def flatten(req: FlattenRequest) -> FlattenResponse: + """ + Higher-level API returning a formal response wrapper (future-friendly). + For now, only 'dict' mode is supported. + + Args: + req: FlattenRequest + + Returns: + FlattenResponse with 'mode' and 'result' + """ + result = flatten_to_dict(req) + return FlattenResponse(mode="dict", result=result) \ No newline at end of file diff --git a/src/jflat/core/schema_base.py b/src/jflat/core/schema_base.py new file mode 100644 index 0000000..57d7a8d --- /dev/null +++ b/src/jflat/core/schema_base.py @@ -0,0 +1,45 @@ + +# jflat/core/schema_base.py +""" +This module defines an abstract interface (contract) for schema flatteners. + +Why do we use an abstract base class (ABC)? +- It forces all flatteners to provide the same methods. +- It helps maintainability: new flatteners must match the same API. +""" + +from __future__ import annotations +from abc import ABC, abstractmethod +from typing import Any + + +class AbstractSchemaFlattener(ABC): + """ + Abstract interface for a class that accepts a JSON schema (dict) + and provides a flattened representation. + """ + + def __init__(self, schema_or_source: Any) -> None: + """ + Concrete classes must validate input in __init__. + + schema_or_source can be: + - a dict containing a JSON schema (Pydantic schema) + - or something else (e.g. Pydantic model/class) depending on implementation + """ + self._schema: dict[str, Any] = self._validate_and_build_schema(schema_or_source) + + @abstractmethod + def _validate_and_build_schema(self, schema_or_source: Any) -> dict[str, Any]: + """Validate input and return a JSON schema dictionary.""" + raise NotImplementedError + + @abstractmethod + def flatten(self) -> dict[str, Any]: + """Return the flattened dictionary in the requested output format.""" + raise NotImplementedError + + @abstractmethod + def to_json(self, *, indent: int = 2) -> str: + """Return a JSON string version (must be JSON-serializable).""" + raise NotImplementedError diff --git a/src/jflat/core/schema_flattern.py b/src/jflat/core/schema_flattern.py new file mode 100644 index 0000000..d1cd366 --- /dev/null +++ b/src/jflat/core/schema_flattern.py @@ -0,0 +1,167 @@ + +# jflat/core/schema_flattener.py +""" +Flatten a Pydantic v2 JSON Schema into a dict that: +- keeps $defs as top-level +- for each model in $defs: + - includes only fields that are NOT $ref (i.e. not other BaseModels) + - collects referenced BaseModels into $contains + - collects inheritance info into $inherits_from (from allOf) +""" + +from __future__ import annotations +from typing import Any, Optional + +import json + +try: + # We only import pydantic types optionally, + # so this class can still accept a dict without importing pydantic everywhere. + from pydantic import BaseModel +except Exception: # pragma: no cover + BaseModel = object # type: ignore + +from jflat.core.schema_base import AbstractSchemaFlattener + + +_JSON_TYPE_TO_PYTHON = { + "string": str, + "integer": int, + "number": float, + "boolean": bool, + "object": dict, + "array": list, +} + + +def _schema_type_to_python(field_schema: dict[str, Any]) -> Any: + """ + Convert JSON Schema 'type' or 'anyOf' into a Python type object. + + This is intentionally simple for a prototype: + - Handles {"type": "string"} -> str + - Handles {"type": ["string", "null"]} -> str (optional) + - Handles {"anyOf": [{"type":"string"}, {"type":"null"}]} -> str (optional) + Otherwise returns object as fallback. + """ + # anyOf often appears for Optional fields + if "anyOf" in field_schema and isinstance(field_schema["anyOf"], list): + candidates = field_schema["anyOf"] + non_null = [c for c in candidates if c.get("type") != "null"] + if non_null: + return _schema_type_to_python(non_null[0]) + return object + + t = field_schema.get("type") + + # type can be a list (e.g., ["string", "null"]) + if isinstance(t, list): + t_no_null = [x for x in t if x != "null"] + if len(t_no_null) == 1: + return _JSON_TYPE_TO_PYTHON.get(t_no_null[0], object) + return object + + if isinstance(t, str): + return _JSON_TYPE_TO_PYTHON.get(t, object) + + return object + + +class PydanticSchemaFlattener(AbstractSchemaFlattener): + """ + Accepts either: + - a dict representing a Pydantic JSON schema + - a Pydantic BaseModel class (recommended) + - a Pydantic BaseModel instance (also OK) + """ + + def _validate_and_build_schema(self, schema_or_source: Any) -> dict[str, Any]: + # Case 1: already a dict (schema) + if isinstance(schema_or_source, dict): + if "$defs" not in schema_or_source: + raise ValueError("Input dict does not look like a Pydantic JSON Schema (missing '$defs').") + return schema_or_source + + # Case 2: a Pydantic model class + if isinstance(schema_or_source, type) and hasattr(schema_or_source, "model_json_schema"): + schema = schema_or_source.model_json_schema() + if "$defs" not in schema: + raise ValueError("Generated schema has no '$defs'. Did you pass the correct model?") + return schema + + # Case 3: a Pydantic model instance + if hasattr(schema_or_source, "__class__") and hasattr(schema_or_source.__class__, "model_json_schema"): + schema = schema_or_source.__class__.model_json_schema() + if "$defs" not in schema: + raise ValueError("Generated schema has no '$defs'. Did you pass the correct model instance?") + return schema + + raise TypeError("Unsupported input. Provide a schema dict or a Pydantic BaseModel class/instance.") + + def flatten(self) -> dict[str, Any]: + schema = self._schema + defs: dict[str, Any] = schema.get("$defs", {}) + + out: dict[str, Any] = {"$defs": {}} + + for def_name, def_schema in defs.items(): + model_entry: dict[str, Any] = {} + + # 1) Detect inheritance via "allOf" + local_schema = def_schema + inherits_from: Optional[str] = None + + if "allOf" in def_schema and isinstance(def_schema["allOf"], list): + # allOf usually contains: + # - one $ref to base model + # - one object schema containing "properties" + for part in def_schema["allOf"]: + if isinstance(part, dict) and "$ref" in part: + inherits_from = part["$ref"] + if isinstance(part, dict) and ("properties" in part or part.get("type") == "object"): + local_schema = part # use the part that contains properties + + if inherits_from: + model_entry["$inherits_from"] = inherits_from + + # 2) Extract properties (fields) + props: dict[str, Any] = local_schema.get("properties", {}) if isinstance(local_schema, dict) else {} + contains: list[str] = [] + + for field_name, field_schema in props.items(): + # If field is a reference to another model => treat as "contains" + if isinstance(field_schema, dict) and "$ref" in field_schema: + ref = field_schema["$ref"] + contains.append(f"{field_name}{ref}") # matches your example: field#/$defs/Other + continue + + # Otherwise: keep this as a "normal field" + python_type = _schema_type_to_python(field_schema if isinstance(field_schema, dict) else {}) + desc = field_schema.get("description") if isinstance(field_schema, dict) else None + + field_info: dict[str, Any] = {"type": python_type} + if desc: + field_info["description"] = desc + + model_entry[field_name] = field_info + + # 3) Save contains list (even if empty for consistency) + model_entry["$contains"] = contains + + out["$defs"][def_name] = model_entry + + return out + + def to_json(self, *, indent: int = 2) -> str: + """ + JSON output cannot contain Python type objects directly. + We convert types like into the string "str". + """ + + def default_encoder(obj: Any) -> Any: + if isinstance(obj, type): + return obj.__name__ # str -> "str", int -> "int" + raise TypeError(f"Object of type {type(obj)} is not JSON serializable") + + return json.dumps(self.flatten(), indent=indent, ensure_ascii=False, default=default_encoder) + diff --git a/src/jflat/models/io.py b/src/jflat/models/io.py new file mode 100644 index 0000000..6a5498f --- /dev/null +++ b/src/jflat/models/io.py @@ -0,0 +1,58 @@ +# jflat/models/io.py +# Pydantic models define the input/output contracts and options for flattening. +# This gives type safety, validation, defaults, and clear JSON serialization. + +from __future__ import annotations +from typing import Any, Literal, Optional +from pydantic import BaseModel, Field, field_validator + + +class FlattenOptions(BaseModel): + """ + Options controlling how flattening behaves. + + Attributes: + sep: separator between nested key parts, e.g., "director_name" + max_depth: limit flattening depth (None means unlimited) + preserve_lists: + True -> leave lists unchanged (stored as lists) + False -> flatten lists by numeric index into keys (e.g., items_0_id) + """ + sep: str = Field("_", description="Separator for nested keys") + max_depth: Optional[int] = Field(default=None, ge=1, description="Maximum nesting depth to flatten") + preserve_lists: bool = Field(default=True, description="If False, lists are flattened by numeric index") + + @field_validator("sep") + @classmethod + def non_empty_sep(cls, v: str) -> str: + # Ensure separator is at least one character to avoid ambiguous keys. + if not v: + raise ValueError("Separator cannot be empty") + return v + + +class FlattenRequest(BaseModel): + """ + Input contract for flattening. + - data: your nested JSON as a Python dict + - options: behavior switches (separator, depth, list handling) + """ + data: dict[str, Any] = Field(..., description="Nested JSON object") + options: FlattenOptions = Field(default_factory=FlattenOptions) + + +class FlattenedDict(BaseModel): + """ + Output contract for dict-style flattening. + 'data' is the resulting flat map. + """ + data: dict[str, Any] = Field(..., description="Flat key-value map") + + +class FlattenResponse(BaseModel): + """ + High-level response wrapper for future extensibility. + Today, we only support mode='dict'. + """ + mode: Literal["dict"] = "dict" + result: FlattenedDict \ No newline at end of file diff --git a/tests/test_src/test_core/test_flattener.py b/tests/test_src/test_core/test_flattener.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_src/test_core/test_schema_base.py b/tests/test_src/test_core/test_schema_base.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_src/test_core/test_schema_flattern.py b/tests/test_src/test_core/test_schema_flattern.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_src/test_models/test_io.py b/tests/test_src/test_models/test_io.py new file mode 100644 index 0000000..e69de29 From 35abba2a55fbcab338cc6f12e25f1826e7b3e35c Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Tue, 27 Jan 2026 14:36:31 +0100 Subject: [PATCH 03/23] schema_flattening.py --- examples/demo_schema.py | 2 +- src/jflat/core/schema_flattening.py | 464 ++++++++++++++++++++++++++++ 2 files changed, 465 insertions(+), 1 deletion(-) create mode 100644 src/jflat/core/schema_flattening.py diff --git a/examples/demo_schema.py b/examples/demo_schema.py index 1346c93..9a254f7 100644 --- a/examples/demo_schema.py +++ b/examples/demo_schema.py @@ -20,4 +20,4 @@ # Perform flattening and print resp = flatten(req) print("Mode:", resp.mode) -print(json.dumps(resp.result.data, ensure_ascii=False, indent=2)) \ No newline at end of file +print(json.dumps(resp.result.data, ensure_ascii=False, indent=2)) diff --git a/src/jflat/core/schema_flattening.py b/src/jflat/core/schema_flattening.py new file mode 100644 index 0000000..ffaf879 --- /dev/null +++ b/src/jflat/core/schema_flattening.py @@ -0,0 +1,464 @@ +""" +schema_flattening.py +===================== +A **single-file, runnable prototype** that demonstrates a didactic pipeline: + +PDF_Extractor (stub) -> dict_raw_extraction (nested) -> SchemaFlattener -> flattened outputs. + +It also supports **Pydantic v2 models** as input and generates a **flattened model-graph** +structure with the following conventions: + +- One central registry: "$defs" +- For each model: + - Primitive fields are listed as {"field": {"type": "str|int|..."}} + - Composition (nested BaseModel fields) is recorded in "$contains": + - entries like "field_name#/$defs/ModelName" + - Inheritance is recorded in "$inherits_from": + - value like "#/$defs/BaseModelName" + +The module exposes a small CLI demo: + + python schema_flattening.py --demo + +which prints two outputs: +1) `{ "$defs": { ... } }` (JSON-embeddable) +2) `{ ... }` (just the inner mapping, as explicitly requested by the user) + +All code is **commented** for teaching purposes. +""" + +from __future__ import annotations +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Tuple, Type, get_args, get_origin +import json +import sys + +try: + # Pydantic v2 imports + from pydantic import BaseModel, Field +except Exception: # pragma: no cover + BaseModel = object # type: ignore + def Field(*args, **kwargs): # type: ignore + return None + +# ----------------------------------------------------------------------------- +# Utilities +# ----------------------------------------------------------------------------- + +def _is_basemodel_subclass(tp: Any) -> bool: + """Return True if *tp* is a (Pydantic) BaseModel subclass. + + Handles Optional[Model], Annotated[Model, ...], etc., by peeling typing wrappers. + """ + if tp is None: + return False + + origin = get_origin(tp) + args = get_args(tp) + + # Unwrap Optional[T], list[T], Annotated[T, ...], etc., by checking typical origins + if origin in (list, List, tuple, Tuple, Optional): + if args: + return any(_is_basemodel_subclass(a) for a in args) + return False + + # Pydantic v2 BaseModel subclass check + try: + return isinstance(tp, type) and issubclass(tp, BaseModel) and (tp is not BaseModel) + except Exception: + return False + + +def _python_type_to_name(tp: Any) -> str: + """Map python types to simple JSON-friendly type names. + + If *tp* is a typing wrapper (Optional, Annotated, etc.), try to reduce it to a base type. + """ + origin = get_origin(tp) + args = get_args(tp) + + # Reduce Optional[T] -> T, List[T] -> T, Annotated[T, ...] -> T + if origin in (list, List, tuple, Tuple, Optional): + if args: + return _python_type_to_name(args[0]) + return "any" + + # Primitive/common types mapping + mapping = { + str: "str", + int: "int", + float: "float", + bool: "bool", + bytes: "bytes", + } + if tp in mapping: + return mapping[tp] + + # Datetime-like: avoid importing heavy modules; use name fallback + try: + import datetime as _dt # local import + if tp in (_dt.date, _dt.datetime, _dt.time): + return tp.__name__ + except Exception: + pass + + # If it's a BaseModel subclass, refer to model name + if _is_basemodel_subclass(tp): + return tp.__name__ + + # Fallback: best-effort readable name + return getattr(tp, "__name__", str(tp)) + + +# ----------------------------------------------------------------------------- +# Abstract Input Adapter (validates input in __init__) +# ----------------------------------------------------------------------------- + +class InputAdapter(ABC): + """Abstract adapter that validates input at construction. + + Subclasses normalize the input to an internal representation that the + flattener can consume. + """ + + def __init__(self, data: Any) -> None: + self.data = data + self._validate() + + @abstractmethod + def _validate(self) -> None: + """Validate *self.data* (raise ValueError on invalid).""" + ... + + @abstractmethod + def to_model_graph(self) -> Dict[str, Dict[str, Any]]: + """Return a *model graph* mapping: { ModelName: {"fields":..., "bases":...} } + + Shape per model entry: + { + "fields": { field_name: {"annotation": , "description": str|None } }, + "bases": [ BaseModelSubclass, ... ] + } + """ + ... + + +# ----------------------------------------------------------------------------- +# Pydantic Adapter: accepts Pydantic model classes and inspects them +# ----------------------------------------------------------------------------- + +class PydanticAdapter(InputAdapter): + def _validate(self) -> None: + # Accept a single BaseModel subclass, a list/tuple of them, or a module-like with attributes + d = self.data + + def _collect_from_module(mod: Any) -> List[Type[BaseModel]]: + out: List[Type[BaseModel]] = [] + for name in dir(mod): + obj = getattr(mod, name) + if isinstance(obj, type): + try: + if issubclass(obj, BaseModel) and obj is not BaseModel: + out.append(obj) + except Exception: + pass + return out + + models: List[Type[BaseModel]] = [] + if isinstance(d, type): + models = [d] + elif isinstance(d, (list, tuple)): + models = [m for m in d if isinstance(m, type)] + else: + # try module-like collector as fallback + models = _collect_from_module(d) + + if not models: + raise ValueError("PydanticAdapter expects BaseModel subclasses or a module containing them.") + + self.models = models + + def to_model_graph(self) -> Dict[str, Dict[str, Any]]: + graph: Dict[str, Dict[str, Any]] = {} + for model in self.models: + fields: Dict[str, Dict[str, Any]] = {} + # pydantic v2: model.model_fields + mf = getattr(model, "model_fields", {}) + for fname, finfo in mf.items(): + ann = getattr(finfo, "annotation", Any) + desc = getattr(finfo, "description", None) + fields[fname] = {"annotation": ann, "description": desc} + + # collect BaseModel bases (for inheritance information) + bases: List[Type[BaseModel]] = [] + for b in model.__bases__: + try: + if issubclass(b, BaseModel) and b is not BaseModel: + bases.append(b) + except Exception: + pass + + graph[model.__name__] = {"fields": fields, "bases": bases} + return graph + + +# ----------------------------------------------------------------------------- +# Dictionary (already-extracted) Adapter: accepts a nested dict and interprets it +# ----------------------------------------------------------------------------- + +class DictAdapter(InputAdapter): + def _validate(self) -> None: + if not isinstance(self.data, dict): + raise ValueError("DictAdapter expects a dictionary.") + + def to_model_graph(self) -> Dict[str, Dict[str, Any]]: + """Interpret a nested dictionary as a set of models. + + Expected shape example (very flexible for demo): + { + "Simulation": { + "fields": { + "name": {"type": str, "description": "..."}, + "system": {"$model": "System"}, + }, + "bases": [], + }, + "System": { + "fields": { + "name": {"type": str}, + }, + "bases": ["BaseSystem"], + }, + "BaseSystem": { "fields": {"id": {"type": str}}, "bases": []} + } + The adapter is permissive; it treats `{ "type": |"str" }` as primitives + and `{ "$model": "ModelName" }` as a BaseModel composition. + """ + raw: Dict[str, Any] = self.data + graph: Dict[str, Dict[str, Any]] = {} + + for model_name, content in raw.items(): + fields_spec = content.get("fields", {}) if isinstance(content, dict) else {} + bases_spec = content.get("bases", []) if isinstance(content, dict) else [] + + # normalize fields into annotation-like data + fields: Dict[str, Dict[str, Any]] = {} + for fname, meta in fields_spec.items(): + if isinstance(meta, dict) and "$model" in meta: + # composition + fields[fname] = {"annotation": meta["$model"], "description": meta.get("description")} + else: + # primitive + tp = meta.get("type", "any") if isinstance(meta, dict) else meta + fields[fname] = {"annotation": tp, "description": meta.get("description") if isinstance(meta, dict) else None} + + # normalize bases + bases: List[Any] = list(bases_spec) if isinstance(bases_spec, (list, tuple)) else [] + + graph[model_name] = {"fields": fields, "bases": bases} + + return graph + + +# ----------------------------------------------------------------------------- +# Flattener +# ----------------------------------------------------------------------------- + +@dataclass +class FlattenOptions: + include_defs_wrapper: bool = True # if True, return {"$defs": {...}}; else return {...} + + +class SchemaFlattener: + """Flatten a *model graph* (from an InputAdapter) into the requested structure. + + The result follows the conventions described at the top of this file. + """ + + def __init__(self, adapter: InputAdapter, options: Optional[FlattenOptions] = None) -> None: + self.adapter = adapter + self.options = options or FlattenOptions() + + def _normalize_annotation_to_str(self, ann: Any) -> Tuple[bool, str]: + """Return (is_model, name) where: + - is_model: True if this is another model reference (composition) + - name: "str|int|..." for primitives, or ModelName for model refs + For DictAdapter, annotations can be strings model names. + For PydanticAdapter, annotations are python types. + """ + # If annotation is a string, assume it's a model name (dict adapter composition) + if isinstance(ann, str): + return True, ann + + if _is_basemodel_subclass(ann): + return True, _python_type_to_name(ann) + + # Primitive or other typing + return False, _python_type_to_name(ann) + + def flatten(self) -> Dict[str, Any]: + graph = self.adapter.to_model_graph() + defs: Dict[str, Any] = {} + + # Helper for inheritance: map model -> first BaseModel base name (if any) + def _first_base_name(bases: List[Any]) -> Optional[str]: + for b in bases: + if isinstance(b, str): # DictAdapter + return b + try: + if issubclass(b, BaseModel) and b is not BaseModel: + return b.__name__ + except Exception: + pass + return None + + for model_name, meta in graph.items(): + fields: Dict[str, Dict[str, Any]] = meta.get("fields", {}) + bases: List[Any] = meta.get("bases", []) + + out_entry: Dict[str, Any] = {"$contains": []} + + # Collect fields: primitives vs model-composition + for fname, finfo in fields.items(): + ann = finfo.get("annotation") + is_model, tname = self._normalize_annotation_to_str(ann) + if is_model: + # composition reference + out_entry["$contains"].append(f"{fname}#/$defs/{tname}") + else: + out_entry[fname] = {"type": tname} + + # Inheritance + base_name = _first_base_name(bases) + if base_name: + out_entry["$inherits_from"] = f"#/$defs/{base_name}" + + defs[model_name] = out_entry + + if self.options.include_defs_wrapper: + return {"$defs": defs} + return defs + + +# ----------------------------------------------------------------------------- +# Demo Pydantic models (from the whiteboard / prompt wording) +# ----------------------------------------------------------------------------- + +class BaseSystem(BaseModel): + id: str = Field(..., description="Unique system identifier") + +class System(BaseSystem): + name: str = Field(..., description="System name") + chem_formula: str = Field(..., description="Chemical formula") + +class Person(BaseModel): + name: str = Field(..., description="Person name") + +class Method(BaseModel): + name: str = Field(..., description="Method name") + person: Person + +class Simulation(BaseModel): + name: str + run_time: float + run_time_unit: str + system: System + method: Method + + +# ----------------------------------------------------------------------------- +# PDF_Extractor (stub): returns a nested dict resembling a raw extraction +# ----------------------------------------------------------------------------- + +class PDF_Extractor: + """Simplified PDF extractor. + + For this prototype, we don't parse a real PDF; we expose a `extract()` + method returning a *nested* dictionary as if parsed from a PDF. In a real + project, you'd wire PyPDF2 or other libs to parse the document. + """ + + def __init__(self, source: Optional[str] = None) -> None: + self.source = source # could be a filepath or in-memory bytes + + def extract(self) -> Dict[str, Any]: + # Synthetic nested structure for demo purposes only + return { + "Simulation": { + "fields": { + "name": {"type": str}, + "run_time": {"type": float}, + "run_time_unit": {"type": str}, + "system": {"$model": "System"}, + "method": {"$model": "Method"}, + }, + "bases": [], + }, + "BaseSystem": { + "fields": {"id": {"type": str}}, + "bases": [], + }, + "System": { + "fields": { + "name": {"type": str}, + "chem_formula": {"type": str}, + }, + "bases": ["BaseSystem"], + }, + "Method": { + "fields": { + "name": {"type": str}, + "person": {"$model": "Person"}, + }, + "bases": [], + }, + "Person": { + "fields": {"name": {"type": str}}, + "bases": [], + }, + } + + +# ----------------------------------------------------------------------------- +# CLI Demo +# ----------------------------------------------------------------------------- + +def _demo() -> None: + print("\n=== DEMO: Flatten from Pydantic models ===\n") + padapter = PydanticAdapter([Simulation, System, BaseSystem, Method, Person]) + + # 1) with $defs wrapper + flt = SchemaFlattener(padapter, options=FlattenOptions(include_defs_wrapper=True)) + out_with_defs = flt.flatten() + print(json.dumps(out_with_defs, indent=2)) + + # 2) bare mapping (required by the prompt to also output) + print("\n--- Bare mapping (no $defs wrapper) ---\n") + flt2 = SchemaFlattener(padapter, options=FlattenOptions(include_defs_wrapper=False)) + out_bare = flt2.flatten() + print(json.dumps(out_bare, indent=2)) + + # 3) Simulate PDF extraction path using DictAdapter + print("\n=== DEMO: Flatten from PDF_Extractor (DictAdapter) ===\n") + raw = PDF_Extractor().extract() + dadapter = DictAdapter(raw) + flt3 = SchemaFlattener(dadapter, options=FlattenOptions(include_defs_wrapper=True)) + print(json.dumps(flt3.flatten(), indent=2)) + + +def main(argv: List[str]) -> int: + if "--demo" in argv: + _demo() + return 0 + + # Default behavior: show short help + print( + "Usage: python schema_flattening.py --demo\n" + "Runs a demo that prints the flattened structures (both variants)." + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) From 27eb1aa1d528c70dcff939ab7f6d51cc84de956e Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Tue, 27 Jan 2026 14:41:54 +0100 Subject: [PATCH 04/23] Lost in the Forest - Reset ~ Starting with JUST one python-script --- src/jflat/__init__.py | 3 - src/jflat/cli.py | 71 --- src/jflat/cli_schema.py | 94 ---- src/jflat/core/flattener.py | 109 ---- src/jflat/core/schema_base.py | 45 -- src/jflat/core/schema_flattening.py | 464 ------------------ src/jflat/core/schema_flattern.py | 167 ------- src/jflat/models/io.py | 58 --- tests/test_src/test_core/test_flattener.py | 0 tests/test_src/test_core/test_schema_base.py | 0 .../test_core/test_schema_flattern.py | 0 tests/test_src/test_models/test_io.py | 0 12 files changed, 1011 deletions(-) delete mode 100644 src/jflat/__init__.py delete mode 100644 src/jflat/cli.py delete mode 100644 src/jflat/cli_schema.py delete mode 100644 src/jflat/core/flattener.py delete mode 100644 src/jflat/core/schema_base.py delete mode 100644 src/jflat/core/schema_flattening.py delete mode 100644 src/jflat/core/schema_flattern.py delete mode 100644 src/jflat/models/io.py delete mode 100644 tests/test_src/test_core/test_flattener.py delete mode 100644 tests/test_src/test_core/test_schema_base.py delete mode 100644 tests/test_src/test_core/test_schema_flattern.py delete mode 100644 tests/test_src/test_models/test_io.py diff --git a/src/jflat/__init__.py b/src/jflat/__init__.py deleted file mode 100644 index e973abd..0000000 --- a/src/jflat/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# jflat/__init__.py -# Export subpackages for convenient importing -__all__ = ["core", "models"] \ No newline at end of file diff --git a/src/jflat/cli.py b/src/jflat/cli.py deleted file mode 100644 index 269d4df..0000000 --- a/src/jflat/cli.py +++ /dev/null @@ -1,71 +0,0 @@ -# jflat/cli.py -# A tiny CLI for demos: read JSON, flatten it, print or write to a file. - -from __future__ import annotations -import argparse -import json -import sys -from pathlib import Path -from jflat.models.io import FlattenOptions, FlattenRequest -from jflat.core.flattener import flatten_to_dict - - -def main(argv: list[str] | None = None) -> int: - parser = argparse.ArgumentParser(description="Flatten a nested JSON file.") - parser.add_argument("input", type=Path, help="Path to input JSON file") - parser.add_argument("-o", "--output", type=Path, help="Path to write flattened JSON") - parser.add_argument("--sep", default="_", help="Key separator (default: _)") - parser.add_argument("--max-depth", type=int, default=None, help="Maximum depth to flatten") - parser.add_argument( - "--preserve-lists", - action="store_true", - help="Keep arrays as-is (default).", - ) - parser.add_argument( - "--index-lists", - action="store_true", - help="Flatten arrays by numeric index into keys.", - ) - - args = parser.parse_args(argv) - - # Validate mutually exclusive flags for list handling - if args.preserve_lists and args.index_lists: - print("Choose either --preserve-lists OR --index-lists (not both).", file=sys.stderr) - return 2 - - # Default: preserve lists - preserve_lists = True - if args.index_lists: - preserve_lists = False - - # Read input JSON file - with args.input.open("r", encoding="utf-8") as f: - raw = json.load(f) - - # Build the Pydantic request model - req = FlattenRequest( - data=raw, - options=FlattenOptions( - sep=args.sep, - max_depth=args.max_depth, - preserve_lists=preserve_lists, - ), - ) - - # Perform flattening - flat = flatten_to_dict(req).data - - # Write to file or print to stdout - if args.output: - with args.output.open("w", encoding="utf-8") as f: - json.dump(flat, f, ensure_ascii=False, indent=2) - print(f"Wrote flattened JSON to {args.output}") - else: - print(json.dumps(flat, ensure_ascii=False, indent=2)) - - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) \ No newline at end of file diff --git a/src/jflat/cli_schema.py b/src/jflat/cli_schema.py deleted file mode 100644 index 1a31396..0000000 --- a/src/jflat/cli_schema.py +++ /dev/null @@ -1,94 +0,0 @@ -# jflat/cli_schema.py -""" -Command Line Interface (CLI) for flattening a *Pydantic JSON Schema*. - -This file is meant to be beginner-friendly: -- It shows how to read a JSON file from disk -- It shows how to call our library code (the flattener) -- It prints the result, or optionally writes it to a file - -How to run (from repo root): - python -m jflat.cli_schema -h - python -m jflat.cli_schema examples/schema.json - python -m jflat.cli_schema examples/schema.json -o out.json -""" - -from __future__ import annotations - -import argparse -import json -from pathlib import Path - -from jflat.core.schema_flattener import PydanticSchemaFlattener - - -def main(argv: list[str] | None = None) -> int: - """ - Entry point for the CLI. - - Why do we return an int? - - In command-line programs, returning 0 means "success" - - Non-zero means "something went wrong" - This is standard for professional CLI tools and CI pipelines. - """ - parser = argparse.ArgumentParser( - prog="jflat.cli_schema", - description="Flatten a Pydantic JSON Schema ($defs/$ref) into a teaching-friendly structure.", - ) - - # Positional argument: path to schema JSON - parser.add_argument( - "schema", - type=Path, - help="Path to a JSON Schema file generated by Pydantic (MyModel.model_json_schema()).", - ) - - # Optional argument: output file - parser.add_argument( - "-o", - "--output", - type=Path, - default=None, - help="If provided, write the flattened result to this JSON file instead of printing.", - ) - - args = parser.parse_args(argv) - - # --- 1) Read the input schema JSON file --- - if not args.schema.exists(): - # Beginner-friendly error message - print(f"ERROR: file not found: {args.schema}") - return 2 - - try: - with args.schema.open("r", encoding="utf-8") as f: - schema_dict = json.load(f) - except json.JSONDecodeError as e: - print(f"ERROR: input is not valid JSON: {args.schema}") - print(f"Details: {e}") - return 2 - - # --- 2) Call our library code (flattener) --- - try: - flattener = PydanticSchemaFlattener(schema_dict) - result_json = flattener.to_json(indent=2) # JSON string for printing/writing - except Exception as e: - # In production you might use logging; for prototype we show clear errors - print("ERROR: could not flatten schema.") - print(f"Details: {e}") - return 2 - - # --- 3) Output --- - if args.output: - args.output.write_text(result_json, encoding="utf-8") - print(f"✅ Wrote flattened schema to: {args.output}") - else: - print(result_json) - - return 0 - - -# This block is the reason `python -m jflat.cli_schema ...` works: -# It runs only when executed as a script/module, not when imported in tests. -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/src/jflat/core/flattener.py b/src/jflat/core/flattener.py deleted file mode 100644 index c669268..0000000 --- a/src/jflat/core/flattener.py +++ /dev/null @@ -1,109 +0,0 @@ -# jflat/core/flattener.py -# Core flattening logic. -# The public APIs accept/return Pydantic models (FlattenRequest/FlattenResponse), -# which improves clarity, validation, and maintainability. - -from __future__ import annotations -from typing import Any -from jflat.models.io import FlattenOptions, FlattenRequest, FlattenedDict, FlattenResponse - - -def _flatten( - data: Any, - *, - parent_key: str = "", - options: FlattenOptions, - depth: int = 0, -) -> dict[str, Any]: - """ - Internal recursive function that flattens 'data' into a simple dict. - - Behavior: - - dict: traverse its keys and accumulate flattened entries - - list: either preserve as list (preserve_lists=True) or index its items into keys - - primitive (str/int/float/bool/None): stored under the current parent key - - max_depth: if set and reached, stop descending and store the nested structure as-is - - Args: - data: any JSON-like structure (dict, list, primitive) - parent_key: current flattened key prefix - options: FlattenOptions (separator, depth, list handling) - depth: current recursion depth - - Returns: - A flat dict with composite keys joined by 'options.sep'. - """ - flat: dict[str, Any] = {} - sep = options.sep - - # If we hit the depth limit, stop recursion and store the value as-is. - if options.max_depth is not None and depth >= options.max_depth: - if parent_key: - flat[parent_key] = data - else: - # Root-level with depth exceeded is rare; still keep content visible. - if isinstance(data, dict): - for k, v in data.items(): - flat[k] = v - else: - flat["value"] = data - return flat - - # Handle dicts: recurse into each key/value - if isinstance(data, dict): - for k, v in data.items(): - new_key = f"{parent_key}{sep}{k}" if parent_key else k - flat.update(_flatten(v, parent_key=new_key, options=options, depth=depth + 1)) - return flat - - # Handle lists: either keep the list or index into new keys - if isinstance(data, list): - if options.preserve_lists: - # Keep the entire list under the current key - if parent_key: - flat[parent_key] = data - else: - flat["list"] = data - return flat - else: - # Flatten list items using numeric indices - for idx, item in enumerate(data): - new_key = f"{parent_key}{sep}{idx}" if parent_key else str(idx) - flat.update(_flatten(item, parent_key=new_key, options=options, depth=depth + 1)) - return flat - - # Handle primitives: store the value under the current key - if parent_key: - flat[parent_key] = data - else: - flat["value"] = data - return flat - - -def flatten_to_dict(req: FlattenRequest) -> FlattenedDict: - """ - Public API: flatten a nested JSON object to a flat dict according to options. - - Args: - req: FlattenRequest Pydantic model (data + options) - - Returns: - FlattenedDict: Pydantic model containing the flat map - """ - flat = _flatten(req.data, parent_key="", options=req.options, depth=0) - return FlattenedDict(data=flat) - - -def flatten(req: FlattenRequest) -> FlattenResponse: - """ - Higher-level API returning a formal response wrapper (future-friendly). - For now, only 'dict' mode is supported. - - Args: - req: FlattenRequest - - Returns: - FlattenResponse with 'mode' and 'result' - """ - result = flatten_to_dict(req) - return FlattenResponse(mode="dict", result=result) \ No newline at end of file diff --git a/src/jflat/core/schema_base.py b/src/jflat/core/schema_base.py deleted file mode 100644 index 57d7a8d..0000000 --- a/src/jflat/core/schema_base.py +++ /dev/null @@ -1,45 +0,0 @@ - -# jflat/core/schema_base.py -""" -This module defines an abstract interface (contract) for schema flatteners. - -Why do we use an abstract base class (ABC)? -- It forces all flatteners to provide the same methods. -- It helps maintainability: new flatteners must match the same API. -""" - -from __future__ import annotations -from abc import ABC, abstractmethod -from typing import Any - - -class AbstractSchemaFlattener(ABC): - """ - Abstract interface for a class that accepts a JSON schema (dict) - and provides a flattened representation. - """ - - def __init__(self, schema_or_source: Any) -> None: - """ - Concrete classes must validate input in __init__. - - schema_or_source can be: - - a dict containing a JSON schema (Pydantic schema) - - or something else (e.g. Pydantic model/class) depending on implementation - """ - self._schema: dict[str, Any] = self._validate_and_build_schema(schema_or_source) - - @abstractmethod - def _validate_and_build_schema(self, schema_or_source: Any) -> dict[str, Any]: - """Validate input and return a JSON schema dictionary.""" - raise NotImplementedError - - @abstractmethod - def flatten(self) -> dict[str, Any]: - """Return the flattened dictionary in the requested output format.""" - raise NotImplementedError - - @abstractmethod - def to_json(self, *, indent: int = 2) -> str: - """Return a JSON string version (must be JSON-serializable).""" - raise NotImplementedError diff --git a/src/jflat/core/schema_flattening.py b/src/jflat/core/schema_flattening.py deleted file mode 100644 index ffaf879..0000000 --- a/src/jflat/core/schema_flattening.py +++ /dev/null @@ -1,464 +0,0 @@ -""" -schema_flattening.py -===================== -A **single-file, runnable prototype** that demonstrates a didactic pipeline: - -PDF_Extractor (stub) -> dict_raw_extraction (nested) -> SchemaFlattener -> flattened outputs. - -It also supports **Pydantic v2 models** as input and generates a **flattened model-graph** -structure with the following conventions: - -- One central registry: "$defs" -- For each model: - - Primitive fields are listed as {"field": {"type": "str|int|..."}} - - Composition (nested BaseModel fields) is recorded in "$contains": - - entries like "field_name#/$defs/ModelName" - - Inheritance is recorded in "$inherits_from": - - value like "#/$defs/BaseModelName" - -The module exposes a small CLI demo: - - python schema_flattening.py --demo - -which prints two outputs: -1) `{ "$defs": { ... } }` (JSON-embeddable) -2) `{ ... }` (just the inner mapping, as explicitly requested by the user) - -All code is **commented** for teaching purposes. -""" - -from __future__ import annotations -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Tuple, Type, get_args, get_origin -import json -import sys - -try: - # Pydantic v2 imports - from pydantic import BaseModel, Field -except Exception: # pragma: no cover - BaseModel = object # type: ignore - def Field(*args, **kwargs): # type: ignore - return None - -# ----------------------------------------------------------------------------- -# Utilities -# ----------------------------------------------------------------------------- - -def _is_basemodel_subclass(tp: Any) -> bool: - """Return True if *tp* is a (Pydantic) BaseModel subclass. - - Handles Optional[Model], Annotated[Model, ...], etc., by peeling typing wrappers. - """ - if tp is None: - return False - - origin = get_origin(tp) - args = get_args(tp) - - # Unwrap Optional[T], list[T], Annotated[T, ...], etc., by checking typical origins - if origin in (list, List, tuple, Tuple, Optional): - if args: - return any(_is_basemodel_subclass(a) for a in args) - return False - - # Pydantic v2 BaseModel subclass check - try: - return isinstance(tp, type) and issubclass(tp, BaseModel) and (tp is not BaseModel) - except Exception: - return False - - -def _python_type_to_name(tp: Any) -> str: - """Map python types to simple JSON-friendly type names. - - If *tp* is a typing wrapper (Optional, Annotated, etc.), try to reduce it to a base type. - """ - origin = get_origin(tp) - args = get_args(tp) - - # Reduce Optional[T] -> T, List[T] -> T, Annotated[T, ...] -> T - if origin in (list, List, tuple, Tuple, Optional): - if args: - return _python_type_to_name(args[0]) - return "any" - - # Primitive/common types mapping - mapping = { - str: "str", - int: "int", - float: "float", - bool: "bool", - bytes: "bytes", - } - if tp in mapping: - return mapping[tp] - - # Datetime-like: avoid importing heavy modules; use name fallback - try: - import datetime as _dt # local import - if tp in (_dt.date, _dt.datetime, _dt.time): - return tp.__name__ - except Exception: - pass - - # If it's a BaseModel subclass, refer to model name - if _is_basemodel_subclass(tp): - return tp.__name__ - - # Fallback: best-effort readable name - return getattr(tp, "__name__", str(tp)) - - -# ----------------------------------------------------------------------------- -# Abstract Input Adapter (validates input in __init__) -# ----------------------------------------------------------------------------- - -class InputAdapter(ABC): - """Abstract adapter that validates input at construction. - - Subclasses normalize the input to an internal representation that the - flattener can consume. - """ - - def __init__(self, data: Any) -> None: - self.data = data - self._validate() - - @abstractmethod - def _validate(self) -> None: - """Validate *self.data* (raise ValueError on invalid).""" - ... - - @abstractmethod - def to_model_graph(self) -> Dict[str, Dict[str, Any]]: - """Return a *model graph* mapping: { ModelName: {"fields":..., "bases":...} } - - Shape per model entry: - { - "fields": { field_name: {"annotation": , "description": str|None } }, - "bases": [ BaseModelSubclass, ... ] - } - """ - ... - - -# ----------------------------------------------------------------------------- -# Pydantic Adapter: accepts Pydantic model classes and inspects them -# ----------------------------------------------------------------------------- - -class PydanticAdapter(InputAdapter): - def _validate(self) -> None: - # Accept a single BaseModel subclass, a list/tuple of them, or a module-like with attributes - d = self.data - - def _collect_from_module(mod: Any) -> List[Type[BaseModel]]: - out: List[Type[BaseModel]] = [] - for name in dir(mod): - obj = getattr(mod, name) - if isinstance(obj, type): - try: - if issubclass(obj, BaseModel) and obj is not BaseModel: - out.append(obj) - except Exception: - pass - return out - - models: List[Type[BaseModel]] = [] - if isinstance(d, type): - models = [d] - elif isinstance(d, (list, tuple)): - models = [m for m in d if isinstance(m, type)] - else: - # try module-like collector as fallback - models = _collect_from_module(d) - - if not models: - raise ValueError("PydanticAdapter expects BaseModel subclasses or a module containing them.") - - self.models = models - - def to_model_graph(self) -> Dict[str, Dict[str, Any]]: - graph: Dict[str, Dict[str, Any]] = {} - for model in self.models: - fields: Dict[str, Dict[str, Any]] = {} - # pydantic v2: model.model_fields - mf = getattr(model, "model_fields", {}) - for fname, finfo in mf.items(): - ann = getattr(finfo, "annotation", Any) - desc = getattr(finfo, "description", None) - fields[fname] = {"annotation": ann, "description": desc} - - # collect BaseModel bases (for inheritance information) - bases: List[Type[BaseModel]] = [] - for b in model.__bases__: - try: - if issubclass(b, BaseModel) and b is not BaseModel: - bases.append(b) - except Exception: - pass - - graph[model.__name__] = {"fields": fields, "bases": bases} - return graph - - -# ----------------------------------------------------------------------------- -# Dictionary (already-extracted) Adapter: accepts a nested dict and interprets it -# ----------------------------------------------------------------------------- - -class DictAdapter(InputAdapter): - def _validate(self) -> None: - if not isinstance(self.data, dict): - raise ValueError("DictAdapter expects a dictionary.") - - def to_model_graph(self) -> Dict[str, Dict[str, Any]]: - """Interpret a nested dictionary as a set of models. - - Expected shape example (very flexible for demo): - { - "Simulation": { - "fields": { - "name": {"type": str, "description": "..."}, - "system": {"$model": "System"}, - }, - "bases": [], - }, - "System": { - "fields": { - "name": {"type": str}, - }, - "bases": ["BaseSystem"], - }, - "BaseSystem": { "fields": {"id": {"type": str}}, "bases": []} - } - The adapter is permissive; it treats `{ "type": |"str" }` as primitives - and `{ "$model": "ModelName" }` as a BaseModel composition. - """ - raw: Dict[str, Any] = self.data - graph: Dict[str, Dict[str, Any]] = {} - - for model_name, content in raw.items(): - fields_spec = content.get("fields", {}) if isinstance(content, dict) else {} - bases_spec = content.get("bases", []) if isinstance(content, dict) else [] - - # normalize fields into annotation-like data - fields: Dict[str, Dict[str, Any]] = {} - for fname, meta in fields_spec.items(): - if isinstance(meta, dict) and "$model" in meta: - # composition - fields[fname] = {"annotation": meta["$model"], "description": meta.get("description")} - else: - # primitive - tp = meta.get("type", "any") if isinstance(meta, dict) else meta - fields[fname] = {"annotation": tp, "description": meta.get("description") if isinstance(meta, dict) else None} - - # normalize bases - bases: List[Any] = list(bases_spec) if isinstance(bases_spec, (list, tuple)) else [] - - graph[model_name] = {"fields": fields, "bases": bases} - - return graph - - -# ----------------------------------------------------------------------------- -# Flattener -# ----------------------------------------------------------------------------- - -@dataclass -class FlattenOptions: - include_defs_wrapper: bool = True # if True, return {"$defs": {...}}; else return {...} - - -class SchemaFlattener: - """Flatten a *model graph* (from an InputAdapter) into the requested structure. - - The result follows the conventions described at the top of this file. - """ - - def __init__(self, adapter: InputAdapter, options: Optional[FlattenOptions] = None) -> None: - self.adapter = adapter - self.options = options or FlattenOptions() - - def _normalize_annotation_to_str(self, ann: Any) -> Tuple[bool, str]: - """Return (is_model, name) where: - - is_model: True if this is another model reference (composition) - - name: "str|int|..." for primitives, or ModelName for model refs - For DictAdapter, annotations can be strings model names. - For PydanticAdapter, annotations are python types. - """ - # If annotation is a string, assume it's a model name (dict adapter composition) - if isinstance(ann, str): - return True, ann - - if _is_basemodel_subclass(ann): - return True, _python_type_to_name(ann) - - # Primitive or other typing - return False, _python_type_to_name(ann) - - def flatten(self) -> Dict[str, Any]: - graph = self.adapter.to_model_graph() - defs: Dict[str, Any] = {} - - # Helper for inheritance: map model -> first BaseModel base name (if any) - def _first_base_name(bases: List[Any]) -> Optional[str]: - for b in bases: - if isinstance(b, str): # DictAdapter - return b - try: - if issubclass(b, BaseModel) and b is not BaseModel: - return b.__name__ - except Exception: - pass - return None - - for model_name, meta in graph.items(): - fields: Dict[str, Dict[str, Any]] = meta.get("fields", {}) - bases: List[Any] = meta.get("bases", []) - - out_entry: Dict[str, Any] = {"$contains": []} - - # Collect fields: primitives vs model-composition - for fname, finfo in fields.items(): - ann = finfo.get("annotation") - is_model, tname = self._normalize_annotation_to_str(ann) - if is_model: - # composition reference - out_entry["$contains"].append(f"{fname}#/$defs/{tname}") - else: - out_entry[fname] = {"type": tname} - - # Inheritance - base_name = _first_base_name(bases) - if base_name: - out_entry["$inherits_from"] = f"#/$defs/{base_name}" - - defs[model_name] = out_entry - - if self.options.include_defs_wrapper: - return {"$defs": defs} - return defs - - -# ----------------------------------------------------------------------------- -# Demo Pydantic models (from the whiteboard / prompt wording) -# ----------------------------------------------------------------------------- - -class BaseSystem(BaseModel): - id: str = Field(..., description="Unique system identifier") - -class System(BaseSystem): - name: str = Field(..., description="System name") - chem_formula: str = Field(..., description="Chemical formula") - -class Person(BaseModel): - name: str = Field(..., description="Person name") - -class Method(BaseModel): - name: str = Field(..., description="Method name") - person: Person - -class Simulation(BaseModel): - name: str - run_time: float - run_time_unit: str - system: System - method: Method - - -# ----------------------------------------------------------------------------- -# PDF_Extractor (stub): returns a nested dict resembling a raw extraction -# ----------------------------------------------------------------------------- - -class PDF_Extractor: - """Simplified PDF extractor. - - For this prototype, we don't parse a real PDF; we expose a `extract()` - method returning a *nested* dictionary as if parsed from a PDF. In a real - project, you'd wire PyPDF2 or other libs to parse the document. - """ - - def __init__(self, source: Optional[str] = None) -> None: - self.source = source # could be a filepath or in-memory bytes - - def extract(self) -> Dict[str, Any]: - # Synthetic nested structure for demo purposes only - return { - "Simulation": { - "fields": { - "name": {"type": str}, - "run_time": {"type": float}, - "run_time_unit": {"type": str}, - "system": {"$model": "System"}, - "method": {"$model": "Method"}, - }, - "bases": [], - }, - "BaseSystem": { - "fields": {"id": {"type": str}}, - "bases": [], - }, - "System": { - "fields": { - "name": {"type": str}, - "chem_formula": {"type": str}, - }, - "bases": ["BaseSystem"], - }, - "Method": { - "fields": { - "name": {"type": str}, - "person": {"$model": "Person"}, - }, - "bases": [], - }, - "Person": { - "fields": {"name": {"type": str}}, - "bases": [], - }, - } - - -# ----------------------------------------------------------------------------- -# CLI Demo -# ----------------------------------------------------------------------------- - -def _demo() -> None: - print("\n=== DEMO: Flatten from Pydantic models ===\n") - padapter = PydanticAdapter([Simulation, System, BaseSystem, Method, Person]) - - # 1) with $defs wrapper - flt = SchemaFlattener(padapter, options=FlattenOptions(include_defs_wrapper=True)) - out_with_defs = flt.flatten() - print(json.dumps(out_with_defs, indent=2)) - - # 2) bare mapping (required by the prompt to also output) - print("\n--- Bare mapping (no $defs wrapper) ---\n") - flt2 = SchemaFlattener(padapter, options=FlattenOptions(include_defs_wrapper=False)) - out_bare = flt2.flatten() - print(json.dumps(out_bare, indent=2)) - - # 3) Simulate PDF extraction path using DictAdapter - print("\n=== DEMO: Flatten from PDF_Extractor (DictAdapter) ===\n") - raw = PDF_Extractor().extract() - dadapter = DictAdapter(raw) - flt3 = SchemaFlattener(dadapter, options=FlattenOptions(include_defs_wrapper=True)) - print(json.dumps(flt3.flatten(), indent=2)) - - -def main(argv: List[str]) -> int: - if "--demo" in argv: - _demo() - return 0 - - # Default behavior: show short help - print( - "Usage: python schema_flattening.py --demo\n" - "Runs a demo that prints the flattened structures (both variants)." - ) - return 0 - - -if __name__ == "__main__": - raise SystemExit(main(sys.argv[1:])) diff --git a/src/jflat/core/schema_flattern.py b/src/jflat/core/schema_flattern.py deleted file mode 100644 index d1cd366..0000000 --- a/src/jflat/core/schema_flattern.py +++ /dev/null @@ -1,167 +0,0 @@ - -# jflat/core/schema_flattener.py -""" -Flatten a Pydantic v2 JSON Schema into a dict that: -- keeps $defs as top-level -- for each model in $defs: - - includes only fields that are NOT $ref (i.e. not other BaseModels) - - collects referenced BaseModels into $contains - - collects inheritance info into $inherits_from (from allOf) -""" - -from __future__ import annotations -from typing import Any, Optional - -import json - -try: - # We only import pydantic types optionally, - # so this class can still accept a dict without importing pydantic everywhere. - from pydantic import BaseModel -except Exception: # pragma: no cover - BaseModel = object # type: ignore - -from jflat.core.schema_base import AbstractSchemaFlattener - - -_JSON_TYPE_TO_PYTHON = { - "string": str, - "integer": int, - "number": float, - "boolean": bool, - "object": dict, - "array": list, -} - - -def _schema_type_to_python(field_schema: dict[str, Any]) -> Any: - """ - Convert JSON Schema 'type' or 'anyOf' into a Python type object. - - This is intentionally simple for a prototype: - - Handles {"type": "string"} -> str - - Handles {"type": ["string", "null"]} -> str (optional) - - Handles {"anyOf": [{"type":"string"}, {"type":"null"}]} -> str (optional) - Otherwise returns object as fallback. - """ - # anyOf often appears for Optional fields - if "anyOf" in field_schema and isinstance(field_schema["anyOf"], list): - candidates = field_schema["anyOf"] - non_null = [c for c in candidates if c.get("type") != "null"] - if non_null: - return _schema_type_to_python(non_null[0]) - return object - - t = field_schema.get("type") - - # type can be a list (e.g., ["string", "null"]) - if isinstance(t, list): - t_no_null = [x for x in t if x != "null"] - if len(t_no_null) == 1: - return _JSON_TYPE_TO_PYTHON.get(t_no_null[0], object) - return object - - if isinstance(t, str): - return _JSON_TYPE_TO_PYTHON.get(t, object) - - return object - - -class PydanticSchemaFlattener(AbstractSchemaFlattener): - """ - Accepts either: - - a dict representing a Pydantic JSON schema - - a Pydantic BaseModel class (recommended) - - a Pydantic BaseModel instance (also OK) - """ - - def _validate_and_build_schema(self, schema_or_source: Any) -> dict[str, Any]: - # Case 1: already a dict (schema) - if isinstance(schema_or_source, dict): - if "$defs" not in schema_or_source: - raise ValueError("Input dict does not look like a Pydantic JSON Schema (missing '$defs').") - return schema_or_source - - # Case 2: a Pydantic model class - if isinstance(schema_or_source, type) and hasattr(schema_or_source, "model_json_schema"): - schema = schema_or_source.model_json_schema() - if "$defs" not in schema: - raise ValueError("Generated schema has no '$defs'. Did you pass the correct model?") - return schema - - # Case 3: a Pydantic model instance - if hasattr(schema_or_source, "__class__") and hasattr(schema_or_source.__class__, "model_json_schema"): - schema = schema_or_source.__class__.model_json_schema() - if "$defs" not in schema: - raise ValueError("Generated schema has no '$defs'. Did you pass the correct model instance?") - return schema - - raise TypeError("Unsupported input. Provide a schema dict or a Pydantic BaseModel class/instance.") - - def flatten(self) -> dict[str, Any]: - schema = self._schema - defs: dict[str, Any] = schema.get("$defs", {}) - - out: dict[str, Any] = {"$defs": {}} - - for def_name, def_schema in defs.items(): - model_entry: dict[str, Any] = {} - - # 1) Detect inheritance via "allOf" - local_schema = def_schema - inherits_from: Optional[str] = None - - if "allOf" in def_schema and isinstance(def_schema["allOf"], list): - # allOf usually contains: - # - one $ref to base model - # - one object schema containing "properties" - for part in def_schema["allOf"]: - if isinstance(part, dict) and "$ref" in part: - inherits_from = part["$ref"] - if isinstance(part, dict) and ("properties" in part or part.get("type") == "object"): - local_schema = part # use the part that contains properties - - if inherits_from: - model_entry["$inherits_from"] = inherits_from - - # 2) Extract properties (fields) - props: dict[str, Any] = local_schema.get("properties", {}) if isinstance(local_schema, dict) else {} - contains: list[str] = [] - - for field_name, field_schema in props.items(): - # If field is a reference to another model => treat as "contains" - if isinstance(field_schema, dict) and "$ref" in field_schema: - ref = field_schema["$ref"] - contains.append(f"{field_name}{ref}") # matches your example: field#/$defs/Other - continue - - # Otherwise: keep this as a "normal field" - python_type = _schema_type_to_python(field_schema if isinstance(field_schema, dict) else {}) - desc = field_schema.get("description") if isinstance(field_schema, dict) else None - - field_info: dict[str, Any] = {"type": python_type} - if desc: - field_info["description"] = desc - - model_entry[field_name] = field_info - - # 3) Save contains list (even if empty for consistency) - model_entry["$contains"] = contains - - out["$defs"][def_name] = model_entry - - return out - - def to_json(self, *, indent: int = 2) -> str: - """ - JSON output cannot contain Python type objects directly. - We convert types like into the string "str". - """ - - def default_encoder(obj: Any) -> Any: - if isinstance(obj, type): - return obj.__name__ # str -> "str", int -> "int" - raise TypeError(f"Object of type {type(obj)} is not JSON serializable") - - return json.dumps(self.flatten(), indent=indent, ensure_ascii=False, default=default_encoder) - diff --git a/src/jflat/models/io.py b/src/jflat/models/io.py deleted file mode 100644 index 6a5498f..0000000 --- a/src/jflat/models/io.py +++ /dev/null @@ -1,58 +0,0 @@ -# jflat/models/io.py -# Pydantic models define the input/output contracts and options for flattening. -# This gives type safety, validation, defaults, and clear JSON serialization. - -from __future__ import annotations -from typing import Any, Literal, Optional -from pydantic import BaseModel, Field, field_validator - - -class FlattenOptions(BaseModel): - """ - Options controlling how flattening behaves. - - Attributes: - sep: separator between nested key parts, e.g., "director_name" - max_depth: limit flattening depth (None means unlimited) - preserve_lists: - True -> leave lists unchanged (stored as lists) - False -> flatten lists by numeric index into keys (e.g., items_0_id) - """ - sep: str = Field("_", description="Separator for nested keys") - max_depth: Optional[int] = Field(default=None, ge=1, description="Maximum nesting depth to flatten") - preserve_lists: bool = Field(default=True, description="If False, lists are flattened by numeric index") - - @field_validator("sep") - @classmethod - def non_empty_sep(cls, v: str) -> str: - # Ensure separator is at least one character to avoid ambiguous keys. - if not v: - raise ValueError("Separator cannot be empty") - return v - - -class FlattenRequest(BaseModel): - """ - Input contract for flattening. - - data: your nested JSON as a Python dict - - options: behavior switches (separator, depth, list handling) - """ - data: dict[str, Any] = Field(..., description="Nested JSON object") - options: FlattenOptions = Field(default_factory=FlattenOptions) - - -class FlattenedDict(BaseModel): - """ - Output contract for dict-style flattening. - 'data' is the resulting flat map. - """ - data: dict[str, Any] = Field(..., description="Flat key-value map") - - -class FlattenResponse(BaseModel): - """ - High-level response wrapper for future extensibility. - Today, we only support mode='dict'. - """ - mode: Literal["dict"] = "dict" - result: FlattenedDict \ No newline at end of file diff --git a/tests/test_src/test_core/test_flattener.py b/tests/test_src/test_core/test_flattener.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_src/test_core/test_schema_base.py b/tests/test_src/test_core/test_schema_base.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_src/test_core/test_schema_flattern.py b/tests/test_src/test_core/test_schema_flattern.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_src/test_models/test_io.py b/tests/test_src/test_models/test_io.py deleted file mode 100644 index e69de29..0000000 From 17230f4d6a6721032e7e3df55101f9adf28b79a6 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Tue, 27 Jan 2026 15:19:08 +0100 Subject: [PATCH 05/23] Reset - Start with Skeleton --- src/jflat.py | 0 tests/test_src/test_jflat.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/jflat.py create mode 100644 tests/test_src/test_jflat.py diff --git a/src/jflat.py b/src/jflat.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_src/test_jflat.py b/tests/test_src/test_jflat.py new file mode 100644 index 0000000..e69de29 From 429133f94d67549a31c67244d012b68558b0d4ec Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Tue, 27 Jan 2026 15:43:04 +0100 Subject: [PATCH 06/23] Working Code --- src/jflat.py | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/src/jflat.py b/src/jflat.py index e69de29..51de863 100644 --- a/src/jflat.py +++ b/src/jflat.py @@ -0,0 +1,102 @@ +""" +PROJECT JFLAT + +KURZ-GOAL -> Pydantic model → .model_dump() → nested JSON/dict → JFlat → flattened dict +""" + +from typing import Any, Dict + + +class JFlat: + """ + A tiny helper class that accepts any JSON‑like dictionary + and can flatten it. + + Usage: + flat = JFlat(input_json).flatten() + print(flat) + """ + + def __init__(self, input_json: Dict[str, Any]): + if not isinstance(input_json, dict): + raise ValueError("JFlat only accepts dictionaries.") + self.input_json = input_json + + # ------------------------------------------------------------ + # Public method + # ------------------------------------------------------------ + def flatten(self) -> Dict[str, Any]: + """ + Returns a flattened dictionary. + Example: {"director": {"name": "X"}} becomes {"director_name": "X"} + """ + flat_dict: Dict[str, Any] = {} + self._flatten_recursive(self.input_json, parent_key="", output=flat_dict) + return flat_dict + + # ------------------------------------------------------------ + # Internal recursive function + # ------------------------------------------------------------ + def _flatten_recursive(self, obj: Any, parent_key: str, output: Dict[str, Any]): + """ + Recursively walks through the JSON dictionary and stores + flattened key/value pairs. + """ + if isinstance(obj, dict): + for key, value in obj.items(): + new_key = f"{parent_key}_{key}" if parent_key else key + self._flatten_recursive(value, new_key, output) + else: + output[parent_key] = obj + + +# ---------------------------------------------------------------- +# Example usage (your demo for MIT students) +# ---------------------------------------------------------------- +if __name__ == "__main__": + # A nested JSON similar to what Pydantic .model_dump() produces + # Example with a little humor: Christopher Nolan is still 50 :-) + input_json = { + "title": "Inception", + "director": { + "name": "Christopher Nolan", + "age": 50 + } + } + + print("\nINPUT JSON (nested):") + print(input_json) + + # flatten it + jflat = JFlat(input_json) + output = jflat.flatten() + + print("\nOUTPUT JSON (flattened):") + print(output) + + # Expected output: + # { + # "title": "Inception", + # "director_name": "Christopher Nolan", + # "director_age": 50 + # } + + +""" +INPUT: +{ + "pizza": { + "toppings": { + "cheese": "mozzarella", + "extra": "pineapple (controversial!)" + } + } +} + +OUPUT: +{ + "pizza_toppings_cheese": "mozzarella", + "pizza_toppings_extra": "pineapple (controversial!)" +} +""" + From da58a8d4f6e57965e3d05713154f9f3514cb9a40 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Tue, 27 Jan 2026 15:55:12 +0100 Subject: [PATCH 07/23] Update - Cleaning up the Code --- tests/test_src/playground_jflat_demo.py | 109 ++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 tests/test_src/playground_jflat_demo.py diff --git a/tests/test_src/playground_jflat_demo.py b/tests/test_src/playground_jflat_demo.py new file mode 100644 index 0000000..700152c --- /dev/null +++ b/tests/test_src/playground_jflat_demo.py @@ -0,0 +1,109 @@ +""" +execute : python jflat_demo.py + +PROJECT JFLAT — Unified Demo Script +----------------------------------- + +This script shows: +1. How Pydantic creates nested JSON and JSON schemas +2. How JFlat flattens nested JSON +3. A teachable prototype for students and non-programmers + +Run with: + python jflat_demo.py +""" + +from typing import Any, Dict +from pydantic import BaseModel, Field + + +# ================================================================ +# PART 1 — PYDANTIC MODELS (your examples) +# ================================================================ + +class Example(BaseModel): + id: str = Field(..., description="The unique identifier") + + +class Person(BaseModel): + name: str = Field(..., description="The person's name") + age: int = Field(..., ge=0, description="The person's age in years") + example: Example + + +class BaseMethod(BaseModel): + author: str = Field(..., description="The author of the method") + + +class Method(BaseMethod): + method_name: str = Field(..., description="The name of the method") + person: Person + + +# ================================================================ +# PART 2 — JFlat: A JSON flattener +# ================================================================ + +class JFlat: + """ + A tiny helper class that accepts a nested JSON-like dictionary + and flattens it into a single-level dictionary. + + Example: + {"person": {"name": "Alice"}} + becomes: + {"person_name": "Alice"} + """ + + def __init__(self, input_json: Dict[str, Any]): + if not isinstance(input_json, dict): + raise ValueError("JFlat only accepts dictionaries.") + self.input_json = input_json + + def flatten(self) -> Dict[str, Any]: + flat_dict: Dict[str, Any] = {} + self._flatten_recursive(self.input_json, parent_key="", output=flat_dict) + return flat_dict + + def _flatten_recursive(self, obj: Any, parent_key: str, output: Dict[str, Any]): + if isinstance(obj, dict): + for key, value in obj.items(): + new_key = f"{parent_key}_{key}" if parent_key else key + self._flatten_recursive(value, new_key, output) + else: + output[parent_key] = obj + + +# ================================================================ +# PART 3 — DEMO EXECUTION +# ================================================================ +if __name__ == "__main__": + print("\n=== Pydantic -> JSON SCHEMA ===") + schema = Method.model_json_schema() + print(schema) + + print("\n=== Nested JSON produced by Pydantic ===") + method_instance = Method( + author="Maxim", + method_name="FlattenSchema", + person=Person( + name="Christopher Nolan", + age=50, + example=Example(id="XYZ123") + ) + ) + + nested_json = method_instance.model_dump() + print(nested_json) + + print("\n=== Flattened using JFlat ===") + flat = JFlat(nested_json).flatten() + print(flat) + + + + + + + + From ff735eb9b6db229a43f9c3ce4a70aa499a8a8af7 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Tue, 27 Jan 2026 15:56:20 +0100 Subject: [PATCH 08/23] Cleaning the Code --- examples/demo_schema.py | 23 ----------------------- examples/schema.json | 7 ------- pyproject.toml | 18 ------------------ 3 files changed, 48 deletions(-) delete mode 100644 examples/demo_schema.py delete mode 100644 examples/schema.json diff --git a/examples/demo_schema.py b/examples/demo_schema.py deleted file mode 100644 index 9a254f7..0000000 --- a/examples/demo_schema.py +++ /dev/null @@ -1,23 +0,0 @@ -# examples/demo.py -# Minimal demo that loads a sample JSON, flattens it, and prints the result. - -import json -from pathlib import Path -from jflat.models.io import FlattenOptions, FlattenRequest -from jflat.core.flattener import flatten - -INPUT = Path(__file__).parent / "sample.json" - -with INPUT.open("r", encoding="utf-8") as f: - raw = json.load(f) - -# Build request with default options: "_" separator, preserve lists -req = FlattenRequest( - data=raw, - options=FlattenOptions(sep="_", preserve_lists=True), -) - -# Perform flattening and print -resp = flatten(req) -print("Mode:", resp.mode) -print(json.dumps(resp.result.data, ensure_ascii=False, indent=2)) diff --git a/examples/schema.json b/examples/schema.json deleted file mode 100644 index a1a2afa..0000000 --- a/examples/schema.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "title": "Inception", - "director": { - "name": "Christopher Nolan", - "age": 50 - } -} diff --git a/pyproject.toml b/pyproject.toml index 49d57e0..e69de29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,18 +0,0 @@ - -[project] -name = "jflat" -version = "0.2.0" -requires-python = ">=3.10" - -dependencies = [ - "pydantic>=2.12,<3", -] - -[project.optional-dependencies] -dev = [ - "pytest>=8.0.0", - "pytest-cov>=5.0.0", -] - -[tool.pytest.ini_options] -addopts = "-q" From 51611f94acb02a1caade15fdb3799c29a8aed38c Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Wed, 28 Jan 2026 10:53:43 +0100 Subject: [PATCH 09/23] Update jflat.py --- src/jflat.py | 141 +++++++++--------- .../playground_simpleWorkingExamle.py | 102 +++++++++++++ 2 files changed, 176 insertions(+), 67 deletions(-) create mode 100644 tests/test_src/playground_simpleWorkingExamle.py diff --git a/src/jflat.py b/src/jflat.py index 51de863..700152c 100644 --- a/src/jflat.py +++ b/src/jflat.py @@ -1,20 +1,58 @@ """ -PROJECT JFLAT +execute : python jflat_demo.py -KURZ-GOAL -> Pydantic model → .model_dump() → nested JSON/dict → JFlat → flattened dict +PROJECT JFLAT — Unified Demo Script +----------------------------------- + +This script shows: +1. How Pydantic creates nested JSON and JSON schemas +2. How JFlat flattens nested JSON +3. A teachable prototype for students and non-programmers + +Run with: + python jflat_demo.py """ from typing import Any, Dict +from pydantic import BaseModel, Field + + +# ================================================================ +# PART 1 — PYDANTIC MODELS (your examples) +# ================================================================ + +class Example(BaseModel): + id: str = Field(..., description="The unique identifier") + + +class Person(BaseModel): + name: str = Field(..., description="The person's name") + age: int = Field(..., ge=0, description="The person's age in years") + example: Example + + +class BaseMethod(BaseModel): + author: str = Field(..., description="The author of the method") + + +class Method(BaseMethod): + method_name: str = Field(..., description="The name of the method") + person: Person +# ================================================================ +# PART 2 — JFlat: A JSON flattener +# ================================================================ + class JFlat: """ - A tiny helper class that accepts any JSON‑like dictionary - and can flatten it. + A tiny helper class that accepts a nested JSON-like dictionary + and flattens it into a single-level dictionary. - Usage: - flat = JFlat(input_json).flatten() - print(flat) + Example: + {"person": {"name": "Alice"}} + becomes: + {"person_name": "Alice"} """ def __init__(self, input_json: Dict[str, Any]): @@ -22,26 +60,12 @@ def __init__(self, input_json: Dict[str, Any]): raise ValueError("JFlat only accepts dictionaries.") self.input_json = input_json - # ------------------------------------------------------------ - # Public method - # ------------------------------------------------------------ def flatten(self) -> Dict[str, Any]: - """ - Returns a flattened dictionary. - Example: {"director": {"name": "X"}} becomes {"director_name": "X"} - """ flat_dict: Dict[str, Any] = {} self._flatten_recursive(self.input_json, parent_key="", output=flat_dict) return flat_dict - # ------------------------------------------------------------ - # Internal recursive function - # ------------------------------------------------------------ def _flatten_recursive(self, obj: Any, parent_key: str, output: Dict[str, Any]): - """ - Recursively walks through the JSON dictionary and stores - flattened key/value pairs. - """ if isinstance(obj, dict): for key, value in obj.items(): new_key = f"{parent_key}_{key}" if parent_key else key @@ -50,53 +74,36 @@ def _flatten_recursive(self, obj: Any, parent_key: str, output: Dict[str, Any]): output[parent_key] = obj -# ---------------------------------------------------------------- -# Example usage (your demo for MIT students) -# ---------------------------------------------------------------- +# ================================================================ +# PART 3 — DEMO EXECUTION +# ================================================================ if __name__ == "__main__": - # A nested JSON similar to what Pydantic .model_dump() produces - # Example with a little humor: Christopher Nolan is still 50 :-) - input_json = { - "title": "Inception", - "director": { - "name": "Christopher Nolan", - "age": 50 - } - } - - print("\nINPUT JSON (nested):") - print(input_json) - - # flatten it - jflat = JFlat(input_json) - output = jflat.flatten() - - print("\nOUTPUT JSON (flattened):") - print(output) - - # Expected output: - # { - # "title": "Inception", - # "director_name": "Christopher Nolan", - # "director_age": 50 - # } + print("\n=== Pydantic -> JSON SCHEMA ===") + schema = Method.model_json_schema() + print(schema) + + print("\n=== Nested JSON produced by Pydantic ===") + method_instance = Method( + author="Maxim", + method_name="FlattenSchema", + person=Person( + name="Christopher Nolan", + age=50, + example=Example(id="XYZ123") + ) + ) + + nested_json = method_instance.model_dump() + print(nested_json) + + print("\n=== Flattened using JFlat ===") + flat = JFlat(nested_json).flatten() + print(flat) + + + + + -""" -INPUT: -{ - "pizza": { - "toppings": { - "cheese": "mozzarella", - "extra": "pineapple (controversial!)" - } - } -} - -OUPUT: -{ - "pizza_toppings_cheese": "mozzarella", - "pizza_toppings_extra": "pineapple (controversial!)" -} -""" diff --git a/tests/test_src/playground_simpleWorkingExamle.py b/tests/test_src/playground_simpleWorkingExamle.py new file mode 100644 index 0000000..51de863 --- /dev/null +++ b/tests/test_src/playground_simpleWorkingExamle.py @@ -0,0 +1,102 @@ +""" +PROJECT JFLAT + +KURZ-GOAL -> Pydantic model → .model_dump() → nested JSON/dict → JFlat → flattened dict +""" + +from typing import Any, Dict + + +class JFlat: + """ + A tiny helper class that accepts any JSON‑like dictionary + and can flatten it. + + Usage: + flat = JFlat(input_json).flatten() + print(flat) + """ + + def __init__(self, input_json: Dict[str, Any]): + if not isinstance(input_json, dict): + raise ValueError("JFlat only accepts dictionaries.") + self.input_json = input_json + + # ------------------------------------------------------------ + # Public method + # ------------------------------------------------------------ + def flatten(self) -> Dict[str, Any]: + """ + Returns a flattened dictionary. + Example: {"director": {"name": "X"}} becomes {"director_name": "X"} + """ + flat_dict: Dict[str, Any] = {} + self._flatten_recursive(self.input_json, parent_key="", output=flat_dict) + return flat_dict + + # ------------------------------------------------------------ + # Internal recursive function + # ------------------------------------------------------------ + def _flatten_recursive(self, obj: Any, parent_key: str, output: Dict[str, Any]): + """ + Recursively walks through the JSON dictionary and stores + flattened key/value pairs. + """ + if isinstance(obj, dict): + for key, value in obj.items(): + new_key = f"{parent_key}_{key}" if parent_key else key + self._flatten_recursive(value, new_key, output) + else: + output[parent_key] = obj + + +# ---------------------------------------------------------------- +# Example usage (your demo for MIT students) +# ---------------------------------------------------------------- +if __name__ == "__main__": + # A nested JSON similar to what Pydantic .model_dump() produces + # Example with a little humor: Christopher Nolan is still 50 :-) + input_json = { + "title": "Inception", + "director": { + "name": "Christopher Nolan", + "age": 50 + } + } + + print("\nINPUT JSON (nested):") + print(input_json) + + # flatten it + jflat = JFlat(input_json) + output = jflat.flatten() + + print("\nOUTPUT JSON (flattened):") + print(output) + + # Expected output: + # { + # "title": "Inception", + # "director_name": "Christopher Nolan", + # "director_age": 50 + # } + + +""" +INPUT: +{ + "pizza": { + "toppings": { + "cheese": "mozzarella", + "extra": "pineapple (controversial!)" + } + } +} + +OUPUT: +{ + "pizza_toppings_cheese": "mozzarella", + "pizza_toppings_extra": "pineapple (controversial!)" +} +""" + From 834087f64c5eff10ffbdabda4399b855ef7f1ac7 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Wed, 28 Jan 2026 11:59:13 +0100 Subject: [PATCH 10/23] Cleaning up Code --- tests/test_src/playground_jflat_demo.py | 109 ------------------ .../playground_simpleWorkingExamle.py | 102 ---------------- 2 files changed, 211 deletions(-) delete mode 100644 tests/test_src/playground_jflat_demo.py delete mode 100644 tests/test_src/playground_simpleWorkingExamle.py diff --git a/tests/test_src/playground_jflat_demo.py b/tests/test_src/playground_jflat_demo.py deleted file mode 100644 index 700152c..0000000 --- a/tests/test_src/playground_jflat_demo.py +++ /dev/null @@ -1,109 +0,0 @@ -""" -execute : python jflat_demo.py - -PROJECT JFLAT — Unified Demo Script ------------------------------------ - -This script shows: -1. How Pydantic creates nested JSON and JSON schemas -2. How JFlat flattens nested JSON -3. A teachable prototype for students and non-programmers - -Run with: - python jflat_demo.py -""" - -from typing import Any, Dict -from pydantic import BaseModel, Field - - -# ================================================================ -# PART 1 — PYDANTIC MODELS (your examples) -# ================================================================ - -class Example(BaseModel): - id: str = Field(..., description="The unique identifier") - - -class Person(BaseModel): - name: str = Field(..., description="The person's name") - age: int = Field(..., ge=0, description="The person's age in years") - example: Example - - -class BaseMethod(BaseModel): - author: str = Field(..., description="The author of the method") - - -class Method(BaseMethod): - method_name: str = Field(..., description="The name of the method") - person: Person - - -# ================================================================ -# PART 2 — JFlat: A JSON flattener -# ================================================================ - -class JFlat: - """ - A tiny helper class that accepts a nested JSON-like dictionary - and flattens it into a single-level dictionary. - - Example: - {"person": {"name": "Alice"}} - becomes: - {"person_name": "Alice"} - """ - - def __init__(self, input_json: Dict[str, Any]): - if not isinstance(input_json, dict): - raise ValueError("JFlat only accepts dictionaries.") - self.input_json = input_json - - def flatten(self) -> Dict[str, Any]: - flat_dict: Dict[str, Any] = {} - self._flatten_recursive(self.input_json, parent_key="", output=flat_dict) - return flat_dict - - def _flatten_recursive(self, obj: Any, parent_key: str, output: Dict[str, Any]): - if isinstance(obj, dict): - for key, value in obj.items(): - new_key = f"{parent_key}_{key}" if parent_key else key - self._flatten_recursive(value, new_key, output) - else: - output[parent_key] = obj - - -# ================================================================ -# PART 3 — DEMO EXECUTION -# ================================================================ -if __name__ == "__main__": - print("\n=== Pydantic -> JSON SCHEMA ===") - schema = Method.model_json_schema() - print(schema) - - print("\n=== Nested JSON produced by Pydantic ===") - method_instance = Method( - author="Maxim", - method_name="FlattenSchema", - person=Person( - name="Christopher Nolan", - age=50, - example=Example(id="XYZ123") - ) - ) - - nested_json = method_instance.model_dump() - print(nested_json) - - print("\n=== Flattened using JFlat ===") - flat = JFlat(nested_json).flatten() - print(flat) - - - - - - - - diff --git a/tests/test_src/playground_simpleWorkingExamle.py b/tests/test_src/playground_simpleWorkingExamle.py deleted file mode 100644 index 51de863..0000000 --- a/tests/test_src/playground_simpleWorkingExamle.py +++ /dev/null @@ -1,102 +0,0 @@ -""" -PROJECT JFLAT - -KURZ-GOAL -> Pydantic model → .model_dump() → nested JSON/dict → JFlat → flattened dict -""" - -from typing import Any, Dict - - -class JFlat: - """ - A tiny helper class that accepts any JSON‑like dictionary - and can flatten it. - - Usage: - flat = JFlat(input_json).flatten() - print(flat) - """ - - def __init__(self, input_json: Dict[str, Any]): - if not isinstance(input_json, dict): - raise ValueError("JFlat only accepts dictionaries.") - self.input_json = input_json - - # ------------------------------------------------------------ - # Public method - # ------------------------------------------------------------ - def flatten(self) -> Dict[str, Any]: - """ - Returns a flattened dictionary. - Example: {"director": {"name": "X"}} becomes {"director_name": "X"} - """ - flat_dict: Dict[str, Any] = {} - self._flatten_recursive(self.input_json, parent_key="", output=flat_dict) - return flat_dict - - # ------------------------------------------------------------ - # Internal recursive function - # ------------------------------------------------------------ - def _flatten_recursive(self, obj: Any, parent_key: str, output: Dict[str, Any]): - """ - Recursively walks through the JSON dictionary and stores - flattened key/value pairs. - """ - if isinstance(obj, dict): - for key, value in obj.items(): - new_key = f"{parent_key}_{key}" if parent_key else key - self._flatten_recursive(value, new_key, output) - else: - output[parent_key] = obj - - -# ---------------------------------------------------------------- -# Example usage (your demo for MIT students) -# ---------------------------------------------------------------- -if __name__ == "__main__": - # A nested JSON similar to what Pydantic .model_dump() produces - # Example with a little humor: Christopher Nolan is still 50 :-) - input_json = { - "title": "Inception", - "director": { - "name": "Christopher Nolan", - "age": 50 - } - } - - print("\nINPUT JSON (nested):") - print(input_json) - - # flatten it - jflat = JFlat(input_json) - output = jflat.flatten() - - print("\nOUTPUT JSON (flattened):") - print(output) - - # Expected output: - # { - # "title": "Inception", - # "director_name": "Christopher Nolan", - # "director_age": 50 - # } - - -""" -INPUT: -{ - "pizza": { - "toppings": { - "cheese": "mozzarella", - "extra": "pineapple (controversial!)" - } - } -} - -OUPUT: -{ - "pizza_toppings_cheese": "mozzarella", - "pizza_toppings_extra": "pineapple (controversial!)" -} -""" - From 9d4e48d22254afe9c0448fb8863a7889ae048905 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Thu, 29 Jan 2026 13:48:28 +0100 Subject: [PATCH 11/23] Update jflat from review notes --- src/examples/schema.py | 19 +++++++++ .../examples/schema_json.json | 0 src/jflat/__init__.py | 0 src/{ => jflat}/jflat.py | 42 ------------------- tests/test_jflat.py | 0 5 files changed, 19 insertions(+), 42 deletions(-) create mode 100644 src/examples/schema.py rename tests/test_src/test_jflat.py => src/examples/schema_json.json (100%) create mode 100644 src/jflat/__init__.py rename src/{ => jflat}/jflat.py (61%) create mode 100644 tests/test_jflat.py diff --git a/src/examples/schema.py b/src/examples/schema.py new file mode 100644 index 0000000..2535c46 --- /dev/null +++ b/src/examples/schema.py @@ -0,0 +1,19 @@ +from pydantic import BaseModel, Field + +class Example(BaseModel): + id: str = Field(..., description="The unique identifier") + + +class Person(BaseModel): + name: str = Field(..., description="The person's name") + age: int = Field(..., ge=0, description="The person's age in years") + example: Example + + +class BaseMethod(BaseModel): + author: str = Field(..., description="The author of the method") + + +class Method(BaseMethod): + method_name: str = Field(..., description="The name of the method") + person: Person \ No newline at end of file diff --git a/tests/test_src/test_jflat.py b/src/examples/schema_json.json similarity index 100% rename from tests/test_src/test_jflat.py rename to src/examples/schema_json.json diff --git a/src/jflat/__init__.py b/src/jflat/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/jflat.py b/src/jflat/jflat.py similarity index 61% rename from src/jflat.py rename to src/jflat/jflat.py index 700152c..90aa428 100644 --- a/src/jflat.py +++ b/src/jflat/jflat.py @@ -1,49 +1,7 @@ -""" -execute : python jflat_demo.py - -PROJECT JFLAT — Unified Demo Script ------------------------------------ - -This script shows: -1. How Pydantic creates nested JSON and JSON schemas -2. How JFlat flattens nested JSON -3. A teachable prototype for students and non-programmers - -Run with: - python jflat_demo.py -""" - from typing import Any, Dict from pydantic import BaseModel, Field -# ================================================================ -# PART 1 — PYDANTIC MODELS (your examples) -# ================================================================ - -class Example(BaseModel): - id: str = Field(..., description="The unique identifier") - - -class Person(BaseModel): - name: str = Field(..., description="The person's name") - age: int = Field(..., ge=0, description="The person's age in years") - example: Example - - -class BaseMethod(BaseModel): - author: str = Field(..., description="The author of the method") - - -class Method(BaseMethod): - method_name: str = Field(..., description="The name of the method") - person: Person - - -# ================================================================ -# PART 2 — JFlat: A JSON flattener -# ================================================================ - class JFlat: """ A tiny helper class that accepts a nested JSON-like dictionary diff --git a/tests/test_jflat.py b/tests/test_jflat.py new file mode 100644 index 0000000..e69de29 From 746b1d4376c460f1ef7bbc0e05ba4ec63ea70c81 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Thu, 29 Jan 2026 14:52:15 +0100 Subject: [PATCH 12/23] Simplify; jflat.py implement flatten_sjonn() --- src/examples/export_schema.py | 12 ++++ src/examples/schema_json.json | 65 +++++++++++++++++++ src/examples/schema_json_aiGenerated.json | 76 +++++++++++++++++++++++ src/jflat/jflat.py | 66 ++++---------------- 4 files changed, 165 insertions(+), 54 deletions(-) create mode 100644 src/examples/export_schema.py create mode 100644 src/examples/schema_json_aiGenerated.json diff --git a/src/examples/export_schema.py b/src/examples/export_schema.py new file mode 100644 index 0000000..fdf7175 --- /dev/null +++ b/src/examples/export_schema.py @@ -0,0 +1,12 @@ +from schema import Method +import json + +"""File to create a json for "Ticket - schema_json.json" with model_json_schema() +this can also be run in a terminal +""" + +schema = Method.model_json_schema() + +with open("schema_json.json", "w") as f: + json.dump(schema, f, indent=4) + \ No newline at end of file diff --git a/src/examples/schema_json.json b/src/examples/schema_json.json index e69de29..5eff27e 100644 --- a/src/examples/schema_json.json +++ b/src/examples/schema_json.json @@ -0,0 +1,65 @@ +{ + "$defs": { + "Example": { + "properties": { + "id": { + "description": "The unique identifier", + "title": "Id", + "type": "string" + } + }, + "required": [ + "id" + ], + "title": "Example", + "type": "object" + }, + "Person": { + "properties": { + "name": { + "description": "The person's name", + "title": "Name", + "type": "string" + }, + "age": { + "description": "The person's age in years", + "minimum": 0, + "title": "Age", + "type": "integer" + }, + "example": { + "$ref": "#/$defs/Example" + } + }, + "required": [ + "name", + "age", + "example" + ], + "title": "Person", + "type": "object" + } + }, + "properties": { + "author": { + "description": "The author of the method", + "title": "Author", + "type": "string" + }, + "method_name": { + "description": "The name of the method", + "title": "Method Name", + "type": "string" + }, + "person": { + "$ref": "#/$defs/Person" + } + }, + "required": [ + "author", + "method_name", + "person" + ], + "title": "Method", + "type": "object" +} \ No newline at end of file diff --git a/src/examples/schema_json_aiGenerated.json b/src/examples/schema_json_aiGenerated.json new file mode 100644 index 0000000..689e8ee --- /dev/null +++ b/src/examples/schema_json_aiGenerated.json @@ -0,0 +1,76 @@ + +{ + "title": "Method", + "type": "object", + "properties": { + "author": { + "title": "Author", + "description": "The author of the method", + "type": "string" + }, + "method_name": { + "title": "Method Name", + "description": "The name of the method", + "type": "string" + }, + "person": { + "title": "Person", + "allOf": [ + { + "$ref": "#/$defs/Person" + } + ] + } + }, + "required": [ + "author", + "method_name", + "person" + ], + "$defs": { + "Example": { + "title": "Example", + "type": "object", + "properties": { + "id": { + "title": "Id", + "description": "The unique identifier", + "type": "string" + } + }, + "required": [ + "id" + ] + }, + "Person": { + "title": "Person", + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The person's name", + "type": "string" + }, + "age": { + "title": "Age", + "description": "The person's age in years", + "type": "integer", + "minimum": 0 + }, + "example": { + "title": "Example", + "allOf": [ + { + "$ref": "#/$defs/Example" + } + ] + } + }, + "required": [ + "name", + "age", + "example" + ] + } + } +} diff --git a/src/jflat/jflat.py b/src/jflat/jflat.py index 90aa428..a8dc1e3 100644 --- a/src/jflat/jflat.py +++ b/src/jflat/jflat.py @@ -1,67 +1,25 @@ from typing import Any, Dict -from pydantic import BaseModel, Field -class JFlat: +def flatten_json(data: Dict[str, Any], parent_key: str = "") -> Dict[str, Any]: """ - A tiny helper class that accepts a nested JSON-like dictionary - and flattens it into a single-level dictionary. + Flatten a nested JSON-like dictionary into a flat dictionary + using underscore-separated keys. Example: {"person": {"name": "Alice"}} - becomes: + becomes: {"person_name": "Alice"} """ + result: Dict[str, Any] = {} - def __init__(self, input_json: Dict[str, Any]): - if not isinstance(input_json, dict): - raise ValueError("JFlat only accepts dictionaries.") - self.input_json = input_json + for key, value in data.items(): + new_key = f"{parent_key}_{key}" if parent_key else key - def flatten(self) -> Dict[str, Any]: - flat_dict: Dict[str, Any] = {} - self._flatten_recursive(self.input_json, parent_key="", output=flat_dict) - return flat_dict - - def _flatten_recursive(self, obj: Any, parent_key: str, output: Dict[str, Any]): - if isinstance(obj, dict): - for key, value in obj.items(): - new_key = f"{parent_key}_{key}" if parent_key else key - self._flatten_recursive(value, new_key, output) + if isinstance(value, dict): + # Recursively flatten child dictionaries + result.update(flatten_json(value, new_key)) else: - output[parent_key] = obj - - -# ================================================================ -# PART 3 — DEMO EXECUTION -# ================================================================ -if __name__ == "__main__": - print("\n=== Pydantic -> JSON SCHEMA ===") - schema = Method.model_json_schema() - print(schema) - - print("\n=== Nested JSON produced by Pydantic ===") - method_instance = Method( - author="Maxim", - method_name="FlattenSchema", - person=Person( - name="Christopher Nolan", - age=50, - example=Example(id="XYZ123") - ) - ) - - nested_json = method_instance.model_dump() - print(nested_json) - - print("\n=== Flattened using JFlat ===") - flat = JFlat(nested_json).flatten() - print(flat) - - - - - - - + result[new_key] = value + return result \ No newline at end of file From 329b94d12e5101b2df7f7228a7b9c77791368072 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Thu, 29 Jan 2026 15:33:13 +0100 Subject: [PATCH 13/23] Refactor struture: move models to examples, simplified -> now flatten_sjon() --- src/jflat/__init__.py | 4 ++++ tests/test_jflat.py | 25 +++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/src/jflat/__init__.py b/src/jflat/__init__.py index e69de29..a4f3a96 100644 --- a/src/jflat/__init__.py +++ b/src/jflat/__init__.py @@ -0,0 +1,4 @@ + +from .jflat import flatten_json + +__all__ = ["flatten_json"] diff --git a/tests/test_jflat.py b/tests/test_jflat.py index e69de29..a1aebdb 100644 --- a/tests/test_jflat.py +++ b/tests/test_jflat.py @@ -0,0 +1,25 @@ + +from jflat.jflat import flatten_json + + +def test_flatten_json_simple(): + data = {"a": 1, "b": 2} + assert flatten_json(data) == {"a": 1, "b": 2} + + +def test_flatten_json_nested(): + data = { + "person": { + "name": "Alice", + "info": { + "age": 30 + } + } + } + + flattened = flatten_json(data) + + assert flattened == { + "person_name": "Alice", + "person_info_age": 30 + } From 8d3a77f5462bca59bc12d01ddd28216c56e7b525 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Thu, 29 Jan 2026 16:04:18 +0100 Subject: [PATCH 14/23] Update pyproject.toml --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index e69de29..b28b04f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -0,0 +1,3 @@ + + + From 2a7fba0b3b7dc927c8bad593a398c3971b1f2628 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Thu, 29 Jan 2026 16:06:06 +0100 Subject: [PATCH 15/23] pyproject.toml is created after BluePrint from Bam-Master --- pyproject.toml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index b28b04f..0f83d57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,33 @@ +[build-system] +requires = ["setuptools>=61.0.0", "wheel"] +build-backend = "setuptools.build_meta" +[project] +name = "jflat" +version = "0.1.0" +description = "Utility functions to flatten nested JSON into a flat dictionary." +readme = "README.md" +authors = [ + { name = "Maxim Köppel" } +] +requires-python = ">=3.10" + +dependencies = [ + "pydantic>=2.0" +] + +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", +] + +[project.urls] +repository = "https://github.com/BAMResearch/jflat" +homepage = "https://github.com/BAMResearch/jflat" + +[tool.pytest.ini_options] +pythonpath = [ + "src" +] From edf1c6fb1e710c5be0f40197c2f9d04a6ee0d88a Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Thu, 29 Jan 2026 16:12:11 +0100 Subject: [PATCH 16/23] Update README.md with clearer project information --- README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/README.md b/README.md index a9b5b86..3c57412 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,27 @@ +*** # jflat Utility functions to transform nested JSON schemas into a flat list of dictionaries. + +*** +# jflat + +A small Python utility to transform **nested JSON objects** into a **flat dictionary** with underscore‐separated keys. + +This project demonstrates: +- A clean `src/`-based Python package layout +- A minimal Pydantic schema example +- Conversion from nested Pydantic model output to a flat JSON using `flatten_json` +- Example schema JSON generation (`examples/schema_json.json`) +- Pytest-based unit tests + +--- + +## 📦 Installation (development mode) + +Clone the repository and install in editable mode: + +```bash +pip install -e . +``` + +*** From b72db6ec55c9264a6a17c1333bb4d4c9c96f949d Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Fri, 30 Jan 2026 09:45:23 +0100 Subject: [PATCH 17/23] Comments are Updated --- src/examples/export_schema.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/examples/export_schema.py b/src/examples/export_schema.py index fdf7175..ca666cb 100644 --- a/src/examples/export_schema.py +++ b/src/examples/export_schema.py @@ -1,8 +1,11 @@ from schema import Method import json -"""File to create a json for "Ticket - schema_json.json" with model_json_schema() -this can also be run in a terminal +""" +File to create a JSON Schema (stored in `./examples/schema_json.json`) based on the example data model defined +in `./examples/schema.py`. + +This JSON Schema is produced by calling `Method.model_json_schema()`, where `Method` is defined in `schema.py`. """ schema = Method.model_json_schema() From d7ce084369b31c76cf65a3d0cdca66e282aaab7e Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Fri, 30 Jan 2026 09:47:15 +0100 Subject: [PATCH 18/23] indent from 4 to 2 -> indent=2 --- src/examples/export_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/examples/export_schema.py b/src/examples/export_schema.py index ca666cb..92c0f2b 100644 --- a/src/examples/export_schema.py +++ b/src/examples/export_schema.py @@ -11,5 +11,5 @@ schema = Method.model_json_schema() with open("schema_json.json", "w") as f: - json.dump(schema, f, indent=4) + json.dump(schema, f, indent=2) \ No newline at end of file From bbb2e067d070ae9883d353463ee2178cf307a252 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Fri, 30 Jan 2026 10:05:10 +0100 Subject: [PATCH 19/23] added ToDo in schema.py - NOT implemented jet --- src/examples/schema.py | 51 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/src/examples/schema.py b/src/examples/schema.py index 2535c46..31ecad4 100644 --- a/src/examples/schema.py +++ b/src/examples/schema.py @@ -16,4 +16,53 @@ class BaseMethod(BaseModel): class Method(BaseMethod): method_name: str = Field(..., description="The name of the method") - person: Person \ No newline at end of file + person: Person + + +#ToDo Maybe you can define more types in this example? Something like str | none, and create more different Fields (so the printed JSON schema is actually richer and we can cover more cases) + + +""" +src/jflat/jflat.py def flatten_json(data: Dict[str, Any], parent_key: str = "") -> Dict[str, Any]: + +The goal of this function is more to map a JSON Schema (as printed by pydantic) to our new flattened version. The end result should look something like: + +{ + "$defs": { + "Method": { + "properties": { + "author": { + "description": "The author of the method", + "type": "string" + }, + "method_name": { + "description": "The name of the method", + "type": "string" + }, + "person": { + "$ref": "#/$defs/Person" + } + }, + "description": "......", + "$inherits_from": "#/$defs/BaseMethod", + }, + +}} +As you can see, I slightly modified the resulting JSON schema when printed using model_json_schema. The idea is to get rid off unnecessary stuff and adding some other info. I: + +Deleted title in each property +Deleted title in each object +Added an $inherits_from key in each of the objects dictionaries +Moved the Method defs inside $defs (before, it is outside because we are printing from it) +We need to add BaseMethod to define inheritances +Deleted all the required and type:object stuff +We could also: + +Add a key inside each property defining if they are mandatory or optional (this was before defined by required. Somethind like: + "author": { + "description": "The author of the method", + "type": "string", + "mandatory": true # this can also be false, if the property is optional (i.e., str | None) + }, + +""" \ No newline at end of file From 8c3a1b2263732ff932d02986d0bc04cf2bb637f3 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Fri, 30 Jan 2026 10:14:20 +0100 Subject: [PATCH 20/23] Starting with update of pyproject.toml --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 0f83d57..4ce6469 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,3 +31,7 @@ pythonpath = [ ] + + + + From c1eeb9bbe90d94276e8d7503388c4e39a52c74f7 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Fri, 30 Jan 2026 10:24:20 +0100 Subject: [PATCH 21/23] pyproject.toml configuration/setup src --- pyproject.toml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4ce6469..d75de63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,10 +25,15 @@ classifiers = [ repository = "https://github.com/BAMResearch/jflat" homepage = "https://github.com/BAMResearch/jflat" -[tool.pytest.ini_options] -pythonpath = [ - "src" -] + +[tool.setuptools] +package-dir = { "" = "src" } + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools_scm] +write_to = "src/jflat/_version.py" From eca8c557f8abbcb08de5dd11423d94dc83d700e3 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Fri, 30 Jan 2026 10:29:44 +0100 Subject: [PATCH 22/23] in .gitignore add: src/jflat/_version.py ; so the _version.py is NOT be pushed --- .gitignore | 5 +++++ pyproject.toml | 14 ++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 0e5701b..62198fd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,11 @@ .MyNotes/ .MyNotes/* + +# Generated by setuptools-scm (do not commit) +src/jflat/_version.py + + # Byte-compiled / optimized / DLL files __pycache__/ *.py[codz] diff --git a/pyproject.toml b/pyproject.toml index d75de63..d925e43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,10 @@ [build-system] -requires = ["setuptools>=61.0.0", "wheel"] +requires = ["setuptools>=61.0.0", "wheel", "setuptools-scm"] build-backend = "setuptools.build_meta" [project] name = "jflat" -version = "0.1.0" +dynamic = ["version"] description = "Utility functions to flatten nested JSON into a flat dictionary." readme = "README.md" authors = [ @@ -25,7 +25,6 @@ classifiers = [ repository = "https://github.com/BAMResearch/jflat" homepage = "https://github.com/BAMResearch/jflat" - [tool.setuptools] package-dir = { "" = "src" } @@ -35,8 +34,7 @@ where = ["src"] [tool.setuptools_scm] write_to = "src/jflat/_version.py" - - - - - +[tool.pytest.ini_options] +pythonpath = [ + "src" +] \ No newline at end of file From 78d37bb3fbc30d6b5a67abd2a511a149d182a2b7 Mon Sep 17 00:00:00 2001 From: KoeppelSoftwareEngineer Date: Fri, 30 Jan 2026 13:10:32 +0100 Subject: [PATCH 23/23] implementing: flatten_json_schema --- src/jflat/jflat.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/jflat/jflat.py b/src/jflat/jflat.py index a8dc1e3..424a07e 100644 --- a/src/jflat/jflat.py +++ b/src/jflat/jflat.py @@ -22,4 +22,10 @@ def flatten_json(data: Dict[str, Any], parent_key: str = "") -> Dict[str, Any]: else: result[new_key] = value - return result \ No newline at end of file + return result + + + + +def flatten_json_schema(): + pass \ No newline at end of file