diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e6bf85b..f9ec174 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -7,4 +7,4 @@ # be identified in the format @org/team-name. Teams must have # explicit write access to the repository. In this example, # the octocats team in the octo-org organization owns all .txt files. -* @opengood-aio/owners +* @opengood-ai/owners diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8c9b4d1..93cd817 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,7 +13,7 @@ on: jobs: build: - uses: opengood-aio/central-workflows/.github/workflows/python-package-build.yml@main + uses: opengood-ai/central-workflows/.github/workflows/python-package-build.yml@main with: run-code-coverage: true secrets: diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml index 0c98e9a..6654a82 100644 --- a/.github/workflows/cancel.yml +++ b/.github/workflows/cancel.yml @@ -3,6 +3,6 @@ on: [push] jobs: cancel: - uses: opengood-aio/central-workflows/.github/workflows/workflow-cancel.yml@main + uses: opengood-ai/central-workflows/.github/workflows/workflow-cancel.yml@main secrets: workflow-token: ${{ secrets.WORKFLOW_TOKEN }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index ee3051d..ad5ea9b 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -15,4 +15,4 @@ on: jobs: build: - uses: opengood-aio/central-workflows/.github/workflows/python-codeql-analyze.yml@main + uses: opengood-ai/central-workflows/.github/workflows/python-codeql-analyze.yml@main diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 149089c..f67f5db 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -10,7 +10,7 @@ on: jobs: coverage: - uses: opengood-aio/central-workflows/.github/workflows/python-code-coverage.yml@main + uses: opengood-ai/central-workflows/.github/workflows/python-code-coverage.yml@main with: run-code-coverage: true secrets: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 12f1f36..b0e41f6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,7 +15,7 @@ on: jobs: release: - uses: opengood-aio/central-workflows/.github/workflows/python-py-pi-release.yml@main + uses: opengood-ai/central-workflows/.github/workflows/python-py-pi-release.yml@main with: git-email: ${{ vars.GIT_EMAIL }} git-user: ${{ vars.GIT_USER }} diff --git a/.github/workflows/update-license-copyright.yml b/.github/workflows/update-license-copyright.yml index cdecd95..a575000 100644 --- a/.github/workflows/update-license-copyright.yml +++ b/.github/workflows/update-license-copyright.yml @@ -7,6 +7,6 @@ on: jobs: update-license-copyright: - uses: opengood-aio/central-workflows/.github/workflows/workflow-update-license-copyright.yml@main + uses: opengood-ai/central-workflows/.github/workflows/workflow-update-license-copyright.yml@main secrets: workflow-token: ${{ secrets.WORKFLOW_TOKEN }} diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..9db2688 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,129 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Python Machine Learning (ML) Plot is a library providing reusable functions for machine learning visualization plotting. The primary focus is on classification model visualization with decision boundaries and classified regions. + +## Architecture + +### Module Structure + +- `src/opengood/py_ml_plot/` - Main package implementing ML plotting utilities + - `classification_plot.py` - Contains `setup_classification_plot()` function for creating 2D classification visualizations + - `__init__.py` - Exports public API (`setup_classification_plot`) + +### Classification Plot Architecture + +The `setup_classification_plot()` function visualizes classification models through several key steps: + +1. **Meshgrid Generation**: Creates a grid of points across the feature space using configurable padding and step sizes for each axis +2. **Feature Scaling** (optional): Applies inverse transformations via `feature_scale` lambda to handle models trained on scaled data +3. **Prediction**: Uses the `predict` lambda to generate class predictions across the entire meshgrid +4. **Visualization**: Combines filled contour plots (decision regions) with scatter plots (actual data points) + +The function is designed to work with any scikit-learn classifier by accepting prediction and feature scaling logic as lambda functions. + +## Development Commands + +### Environment Setup + +```bash +# Create and activate virtual environment +python3 -m venv .venv +source .venv/bin/activate + +# Install dependencies +python3 -m pip install matplotlib numpy pandas scikit-learn +``` + +### Testing + +```bash +# Run all tests with coverage +python -m pytest tests/ + +# Run a specific test +python -m pytest tests/py_ml_plot/test_classification_plot.py::TestClassificationPlot::test_logistic_regression_setup_classification_plot_with_shaded_regions + +# Run tests with verbose output +python -m pytest tests/ -v + +# Run tests with coverage report +python -m pytest tests/ --cov=src.opengood.py_ml_plot --cov-report=term-missing +``` + +**Note**: Some tests are marked with `@skip` decorator for long-running tests (e.g., K-NN visualization). These are intended for local verification only. + +### Test Configuration + +Test configuration is in `pyproject.toml` under `[tool.pytest.ini_options]`: +- Coverage threshold: 70% +- Test markers: `slow`, `smoke`, `unit` +- Coverage report outputs to `coverage.xml` + +### Project Structure + +``` +src/opengood/py_ml_plot/ + - classification_plot.py # Core plotting function + - __init__.py # Public API exports +tests/ + - py_ml_plot/ + - test_classification_plot.py # Tests for classification plotting + - resources/ + - data.csv # Test dataset +``` + +## Key Implementation Details + +### Meshgrid Parameter Structure + +The `meshgrid` parameter controls the visualization bounds and resolution: + +```python +meshgrid = { + 0: {"min": 10, "max": 10, "step": 0.25}, # x-axis padding and step + 1: {"min": 1000, "max": 1000, "step": 0.25} # y-axis padding and step +} +``` + +- Keys `0` and `1` represent x-axis and y-axis respectively +- `min`/`max` define padding subtracted/added from data min/max values +- `step` controls the density of the meshgrid (smaller = higher resolution but slower) + +### Lambda Function Patterns + +**Feature Scaling Lambda**: +```python +feature_scale = lambda x_set, y_set: (sc.inverse_transform(x_set), y_set) +``` +- Inverts feature scaling applied during training to show original data ranges +- Returns tuple of (transformed_x, original_y) + +**Prediction Lambda**: +```python +predict = lambda x1, x2: classifier.predict( + sc.transform(np.array([x1.ravel(), x2.ravel()]).T) +).reshape(x1.shape) +``` +- Takes meshgrid coordinates (x1, x2) +- Applies feature scaling if the classifier was trained on scaled data +- Uses `ravel()` to flatten, combines into 2D array, transforms, predicts, then reshapes + +## Package Management + +- Build system: setuptools +- Python version: >=3.12 +- Package name: `opengood.py_ml_plot` +- Current version tracked in `pyproject.toml` +- Version bumping configured in `.bumpversion.toml` + +## Dependencies + +Core runtime dependencies: +- matplotlib >= 3.10.3 (visualization) +- numpy >= 2.3.0rc1 (numerical operations) +- pandas >= 2.2.3 (data handling) +- scikit-learn >= 1.7.0rc1 (ML models in examples/tests) \ No newline at end of file diff --git a/LICENSE b/LICENSE index ac4c1cf..b3c6803 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2025 OpenGood +Copyright (c) 2025-2026 OpenGood Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 2f8f52e..3174aba 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ # Python Machine Learning (ML) Plot -[![Build](https://github.com/opengood-aio/py-ml-plot/workflows/build/badge.svg)](https://github.com/opengood-aio/py-ml-plot/actions?query=workflow%3Abuild) -[![Release](https://github.com/opengood-aio/py-ml-plot/workflows/release/badge.svg)](https://github.com/opengood-aio/py-ml-plot/actions?query=workflow%3Arelease) -[![CodeQL](https://github.com/opengood-aio/py-ml-plot/actions/workflows/codeql.yml/badge.svg)](https://github.com/opengood-aio/py-ml-plot/actions/workflows/codeql.yml) -[![Codecov](https://codecov.io/gh/opengood-aio/py-ml-plot/graph/badge.svg?token=WX6Er5S6Vj)](https://codecov.io/gh/opengood-aio/py-ml-plot) -[![Release Version](https://img.shields.io/github/release/opengood-aio/py-ml-plot.svg)](https://github.com/opengood-aio/py-ml-plot/releases/latest) +[![Build](https://github.com/opengood-ai/py-ml-plot/workflows/build/badge.svg)](https://github.com/opengood-ai/py-ml-plot/actions?query=workflow%3Abuild) +[![Release](https://github.com/opengood-ai/py-ml-plot/workflows/release/badge.svg)](https://github.com/opengood-ai/py-ml-plot/actions?query=workflow%3Arelease) +[![CodeQL](https://github.com/opengood-ai/py-ml-plot/actions/workflows/codeql.yml/badge.svg)](https://github.com/opengood-ai/py-ml-plot/actions/workflows/codeql.yml) +[![Codecov](https://codecov.io/gh/opengood-ai/py-ml-plot/graph/badge.svg?token=WX6Er5S6Vj)](https://codecov.io/gh/opengood-ai/py-ml-plot) +[![Release Version](https://img.shields.io/github/release/opengood-ai/py-ml-plot.svg)](https://github.com/opengood-ai/py-ml-plot/releases/latest) [![PyPI](https://img.shields.io/pypi/v/opengood.py-ml-plot)](https://pypi.org/project/opengood.py-ml-plot/) ![Python](https://img.shields.io/pypi/pyversions/opengood.py-ml-plot) -[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/opengood-aio/py-ml-plot/master/LICENSE) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/opengood-ai/py-ml-plot/master/LICENSE) Modules containing reusable functions for machine learning visualization plotting @@ -98,7 +98,7 @@ plt.show() ##### Output -![Setup Classification Plot Visualization](https://raw.githubusercontent.com/opengood-aio/py-ml-plot/main/docs/images/setup-classification-plot-visualization.png) +![Setup Classification Plot Visualization](https://raw.githubusercontent.com/opengood-ai/py-ml-plot/main/docs/images/setup-classification-plot-visualization.png) --- @@ -109,8 +109,8 @@ plt.show() `feature_scale` lambda implementation logic for function `setup_classification_plot` is as follows: -* Inverse feature scaling is invoked via a featuring scaling object, such as - the `StandardScalar` object `sc` created earlier for feature scaling +* Inverse feature scaling is invoked via a feature scaling object, such as + the `StandardScaler` object `sc` created earlier for feature scaling * `x_set` and `y_set` are assigned non-feature scaled values of the matrix of features and the dependent variable * `x_set` values are inverted from their feature-scaled values in `x` @@ -150,7 +150,7 @@ as follows: * If the `feature_scale` lambda is defined, `x_set` and `y_set` are assigned non-feature scaled values of the matrix of features and the dependent variable from the sets using a feature scaling object, such as the - `StandardScalar` object created earlier for feature scaling + `StandardScaler` object created earlier for feature scaling * `x_set` values are inverted from their feature-scaled values in `x` * `y_set` values are not inverted and taken directly from `y` * `meshgrid` function from the NumPy library returns a tuple of coordinate @@ -187,7 +187,7 @@ as follows: * `step` parameter * Spacing between values * Value of `0.25` is added for spacing -* The prediction logic implemented in the `preodict` lambda is executed, and +* The prediction logic implemented in the `predict` lambda is executed, and the result is assigned to `y_pred`, containing the predictions * `contourf` function from the Matplotlib library is used for creating filled contour plots @@ -264,32 +264,54 @@ as follows: ## Python Virtual Environment -Create Python virtual environment: +Create and activate Python virtual environment: ```bash -cd ~/workspace/opengood-aio/py-ml-plot/.venv -python3 -m venv ~/workspace/opengood-aio/py-ml-plot/.venv +python3 -m venv .venv source .venv/bin/activate ``` -## Install Packages +## Install Dependencies + +Install the package in editable mode with all dependencies: + +```bash +python3 -m pip install -e . +``` + +Install with development dependencies (includes `pip-tools`, `pytest`, `pytest-cov`): ```bash -python3 -m pip install matplotlib -python3 -m pip install numpy -python3 -m pip install pandas -python3 -m pip install scikit-learn +python3 -m pip install -e ".[dev]" ``` -## Create Requirements File +## Generate Requirements File + +Generate a pinned `requirements.txt` file from pyproject.toml: ```bash -pip freeze > requirements.txt +pip-compile pyproject.toml -o requirements.txt ``` +This creates a `requirements.txt` with exact versions of all dependencies and their transitive dependencies. + ## Run Tests +Run all tests with coverage: + ```bash python -m pytest tests/ ``` +Run tests with verbose output and coverage report: + +```bash +python -m pytest tests/ -v --cov=src.opengood.py_ml_plot --cov-report=term-missing +``` + +Run a specific test: + +```bash +python -m pytest tests/py_ml_plot/test_classification_plot.py::TestClassificationPlot::test_logistic_regression_setup_classification_plot_with_shaded_regions +``` + diff --git a/pyproject.toml b/pyproject.toml index fc6f4b3..d31fb91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools >= 77.0.3", "wheel"] +requires = ["setuptools >= 82.0.1", "wheel"] build-backend = "setuptools.build_meta" [project] @@ -19,18 +19,26 @@ readme = "README.md" license = { file = "LICENSE" } dependencies = [ - "matplotlib >= 3.10.3", - "numpy >= 2.3.0rc1", - "pandas >= 2.2.3", - "scikit-learn >= 1.7.0rc1", + "matplotlib >= 3.10.8", + "numpy >= 2.4.3", + "pandas >= 3.0.1", + "scikit-learn >= 1.8.0", +] + +[project.optional-dependencies] +dev = [ + "pip-tools >= 7.5.3", + "pytest >= 9.0.2", + "pytest-cov >= 7.0.0", ] [project.urls] -Homepage = "https://github.com/opengood-aio/py-ml-plot" -Documentation = "https://github.com/opengood-aio/py-ml-plot" +Homepage = "https://github.com/opengood-ai/py-ml-plot" +Documentation = "https://github.com/opengood-ai/py-ml-plot" [tool.pytest.ini_options] -addopts = "tests/ -v --cov=src.opengood.py_ml_plot --cov-report=xml --cov-fail-under=70" +addopts = "-v --cov=src.opengood.py_ml_plot --cov-report=xml --cov-fail-under=70" +testpaths = ["tests"] python_files = "test_*.py" python_functions = "test_*" markers = [ diff --git a/pyvenv.cfg b/pyvenv.cfg deleted file mode 100644 index 63c34ba..0000000 --- a/pyvenv.cfg +++ /dev/null @@ -1,5 +0,0 @@ -home = /opt/homebrew/opt/python@3.13/bin -include-system-site-packages = false -version = 3.13.2 -executable = /opt/homebrew/Cellar/python@3.13/3.13.2/Frameworks/Python.framework/Versions/3.13/bin/python3.13 -command = /opt/homebrew/opt/python@3.13/bin/python3.13 -m venv /Users/cjaehnen/workspace/opengood-aio/py-ml-plot diff --git a/requirements.txt b/requirements.txt index 794875c..1dce071 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,46 @@ -matplotlib~=3.10.3 -numpy~=2.3.0rc1 -pandas~=2.2.3 -scikit-learn~=1.7.0rc1 +# +# This file is autogenerated by pip-compile with Python 3.13 +# by the following command: +# +# pip-compile --output-file=requirements.txt pyproject.toml +# +contourpy==1.3.3 + # via matplotlib +cycler==0.12.1 + # via matplotlib +fonttools==4.62.1 + # via matplotlib +joblib==1.5.3 + # via scikit-learn +kiwisolver==1.5.0 + # via matplotlib +matplotlib==3.10.8 + # via opengood.py_ml_plot (pyproject.toml) +numpy==2.4.3 + # via + # contourpy + # matplotlib + # opengood.py_ml_plot (pyproject.toml) + # pandas + # scikit-learn + # scipy +packaging==26.0 + # via matplotlib +pandas==3.0.1 + # via opengood.py_ml_plot (pyproject.toml) +pillow==12.1.1 + # via matplotlib +pyparsing==3.3.2 + # via matplotlib +python-dateutil==2.9.0.post0 + # via + # matplotlib + # pandas +scikit-learn==1.8.0 + # via opengood.py_ml_plot (pyproject.toml) +scipy==1.17.1 + # via scikit-learn +six==1.17.0 + # via python-dateutil +threadpoolctl==3.6.0 + # via scikit-learn diff --git a/tests/py_ml_plot/test_classification_plot.py b/tests/py_ml_plot/test_classification_plot.py index 56f20ce..3097e95 100644 --- a/tests/py_ml_plot/test_classification_plot.py +++ b/tests/py_ml_plot/test_classification_plot.py @@ -44,11 +44,9 @@ def test_logistic_regression_setup_classification_plot_with_shaded_regions(self) 1: {"min": 1000, "max": 1000, "step": 0.25}, }, feature_scale=lambda x_set, y_set: (sc.inverse_transform(x_set), y_set), - predict=lambda x1, x2: ( - classifier.predict( - sc.transform(np.array([x1.ravel(), x2.ravel()]).T) - ).reshape(x1.shape) - ), + predict=lambda x1, x2: classifier.predict( + sc.transform(np.array([x1.ravel(), x2.ravel()]).T) + ).reshape(x1.shape), ) assert result is True, "Classification plot should be set up correctly" @@ -107,11 +105,9 @@ def test_k_nearest_neighbor_setup_classification_plot_with_shaded_regions(self): 1: {"min": 1000, "max": 1000, "step": 0.25}, }, feature_scale=lambda x_set, y_set: (sc.inverse_transform(x_set), y_set), - predict=lambda x1, x2: ( - classifier.predict( - sc.transform(np.array([x1.ravel(), x2.ravel()]).T) - ).reshape(x1.shape) - ), + predict=lambda x1, x2: classifier.predict( + sc.transform(np.array([x1.ravel(), x2.ravel()]).T) + ).reshape(x1.shape), ) assert result is True, "Classification plot should be set up correctly"