From a8f8e6c133a86a237956610fc31be0e64c2acda9 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 2 Apr 2026 08:09:01 -0700 Subject: [PATCH] some helpful claude sweeps --- .claude/accuracy-sweep-state.json | 9 ++ .claude/commands/accuracy-sweep.md | 158 +++++++++++++++++++++++++++ .claude/performance-sweep-state.json | 47 ++++++++ 3 files changed, 214 insertions(+) create mode 100644 .claude/accuracy-sweep-state.json create mode 100644 .claude/commands/accuracy-sweep.md create mode 100644 .claude/performance-sweep-state.json diff --git a/.claude/accuracy-sweep-state.json b/.claude/accuracy-sweep-state.json new file mode 100644 index 00000000..f4b62063 --- /dev/null +++ b/.claude/accuracy-sweep-state.json @@ -0,0 +1,9 @@ +{ + "inspections": { + "zonal": { "last_inspected": "2026-03-30T12:00:00Z", "issue": 1090 }, + "focal": { "last_inspected": "2026-03-30T13:00:00Z", "issue": 1092 }, + "multispectral": { "last_inspected": "2026-03-30T14:00:00Z", "issue": 1094 }, + "proximity": { "last_inspected": "2026-03-30T15:00:00Z", "issue": null, "notes": "Direction >= boundary fragile but works due to truncated constant. Float32 truncation is design choice. No wrong-results bugs found." }, + "curvature": { "last_inspected": "2026-03-30T15:00:00Z", "issue": null, "notes": "Formula matches ArcGIS reference. Backends consistent. No issues found." } + } +} diff --git a/.claude/commands/accuracy-sweep.md b/.claude/commands/accuracy-sweep.md new file mode 100644 index 00000000..a28e9cc3 --- /dev/null +++ b/.claude/commands/accuracy-sweep.md @@ -0,0 +1,158 @@ +# Accuracy Sweep: Generate a Ralph Loop targeting under-inspected modules + +Analyze xrspatial modules by recency and inspection history, then print a +ready-to-run `/ralph-loop` command that targets the highest-priority modules. + +Optional arguments: $ARGUMENTS +(e.g. `--top 5`, `--exclude slope,aspect`, `--only-terrain`, `--reset-state`) + +--- + +## Step 1 -- Gather module metadata via git + +For every `.py` file directly under `xrspatial/` (skip `__init__.py`, +`_version.py`, `__main__.py`, `utils.py`, `accessor.py`, `preview.py`, +`dataset_support.py`, `diagnostics.py`, `analytics.py`), collect: + +| Field | How | +|-------|-----| +| **last_modified** | `git log -1 --format=%aI -- xrspatial/.py` | +| **first_commit** | `git log --diff-filter=A --format=%aI -- xrspatial/.py` | +| **total_commits** | `git log --oneline -- xrspatial/.py \| wc -l` | +| **recent_accuracy_commits** | `git log --oneline --grep='accuracy\|precision\|numerical\|geodesic' -- xrspatial/.py` | + +Store results in a temporary variable -- do NOT write intermediate files. + +## Step 2 -- Load inspection state + +Read the state file at `.claude/accuracy-sweep-state.json`. + +If it does not exist, treat every module as never-inspected. + +If `$ARGUMENTS` contains `--reset-state`, delete the file and treat +everything as never-inspected. + +The state file schema: + +```json +{ + "inspections": { + "slope": { "last_inspected": "2026-03-28T14:00:00Z", "issue": 1042 }, + "aspect": { "last_inspected": "2026-03-28T15:30:00Z", "issue": 1043 } + } +} +``` + +## Step 3 -- Score each module + +Compute a priority score for each module. Higher = more urgent. + +``` +days_since_inspected = (today - last_inspected).days # 9999 if never inspected +days_since_modified = (today - last_modified).days +total_commits = from Step 1 +has_recent_accuracy_work = 1 if recent_accuracy_commits is non-empty, else 0 + +score = (days_since_inspected * 3) + + (total_commits * 0.5) + - (days_since_modified * 0.2) + - (has_recent_accuracy_work * 500) +``` + +Rationale: +- Modules never inspected dominate (9999 * 3) +- More commits = more complex = more likely to have bugs +- Recently modified modules slightly deprioritized (someone just touched them) +- Modules with existing accuracy work heavily deprioritized + +## Step 4 -- Apply filters from $ARGUMENTS + +- `--top N` -- only include the top N modules (default: 5) +- `--exclude mod1,mod2` -- remove named modules from the list +- `--only-terrain` -- restrict to slope, aspect, curvature, terrain, + terrain_metrics, hillshade, sky_view_factor +- `--only-focal` -- restrict to focal, convolution, morphology, bilateral, + edge_detection, glcm +- `--only-hydro` -- restrict to flood, cost_distance, geodesic, + surface_distance, viewshed, erosion, diffusion + +## Step 5 -- Print the results + +### 5a. Print the ranked table + +Print a markdown table showing ALL scored modules (not just the selected ones), +sorted by score descending: + +``` +| Rank | Module | Score | Last Inspected | Last Modified | Commits | +|------|-----------------|--------|----------------|---------------|---------| +| 1 | viewshed | 30012 | never | 45 days ago | 23 | +| 2 | flood | 29998 | never | 120 days ago | 18 | +| ... | ... | ... | ... | ... | ... | +``` + +### 5b. Print the generated ralph-loop command + +Using the top N modules from the ranked list, generate and print a command +like this (adapt the module list to actual results): + +```` +/ralph-loop "Survey xarray-spatial modules for numerical accuracy issues. + +**Target these modules in priority order:** +1. viewshed (xrspatial/viewshed.py) -- never inspected, 23 commits +2. flood (xrspatial/flood.py) -- never inspected, 18 commits +3. focal (xrspatial/focal.py) -- never inspected, 31 commits +4. erosion (xrspatial/erosion.py) -- never inspected, 12 commits +5. classify (xrspatial/classify.py) -- never inspected, 9 commits + +**For each module, in order:** +1. Read the source and identify potential accuracy issues: + - Floating point precision loss + - Incorrect NaN propagation + - Off-by-one errors in neighborhood operations + - Missing or wrong Earth curvature corrections + - Backend inconsistencies (numpy vs cupy vs dask results differ) +2. Run /rockout to fix the issue end-to-end (issue, worktree, fix, tests, docs) +3. After completing rockout for ONE module, output ITERATION DONE + +If you find no accuracy issues in the current target module, skip it and move +to the next one. + +If all target modules have been addressed or have no issues, output +ALL ACCURACY ISSUES FIXED." --max-iterations {N} --completion-promise "ALL ACCURACY ISSUES FIXED" +```` + +Set `--max-iterations` to the number of target modules + 2 (buffer for retries). + +### 5c. Print a reminder + +``` +To run this sweep: copy the command above and paste it. +To update state after a manual rockout: edit .claude/accuracy-sweep-state.json +To reset all tracking: /accuracy-sweep --reset-state +``` + +## Step 6 -- Update state (ONLY when called from inside a ralph-loop) + +This step is informational. The accuracy-sweep command itself does NOT update +the state file. State is updated when `/rockout` completes -- the rockout +workflow should append to `.claude/accuracy-sweep-state.json` after creating +the issue. + +To enable this, print a note reminding the user that after each rockout +iteration completes, they can manually record the inspection: + +```json +// Add to .claude/accuracy-sweep-state.json after each rockout: +{ "module_name": { "last_inspected": "ISO-DATE", "issue": ISSUE_NUMBER } } +``` + +--- + +## General Rules + +- Do NOT modify any source files. This command is read-only analysis. +- Do NOT create GitHub issues. This command only generates the ralph-loop command. +- Keep the output concise -- the table and command are the deliverables. +- If $ARGUMENTS is empty, use defaults: top 5, no category filter, no exclusions. diff --git a/.claude/performance-sweep-state.json b/.claude/performance-sweep-state.json new file mode 100644 index 00000000..fd74d98f --- /dev/null +++ b/.claude/performance-sweep-state.json @@ -0,0 +1,47 @@ +{ + "last_triage": "2026-03-31T18:00:00Z", + "modules": { + "reproject": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "RISKY", "bottleneck": "compute-bound", "high_count": 1, "issue": null }, + "geotiff": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "IO-bound", "high_count": 0, "issue": null, "notes": "False positive. open_geotiff(chunks=N) returns lazy dask array. to_geotiff auto-routes dask inputs to write_streaming. Eager paths are by design for numpy/cupy." }, + "zonal": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "WILL OOM", "bottleneck": "memory-bound", "high_count": 4, "issue": 1110, "notes": "Memory guards improved, iterrows replaced with isin. da.unique().compute() confirmed safe (small result). regions() is inherently global - documented limitation." }, + "viewshed": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "memory-bound", "high_count": 1, "issue": null }, + "rasterize": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "graph-bound", "high_count": 1, "issue": null }, + "bump": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "WILL OOM", "bottleneck": "memory-bound", "high_count": 0, "issue": null }, + "normalize": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": 1124, "notes": "Boolean indexing replaced with lazy nanmin/nanmax/nanmean/nanstd." }, + "mahalanobis": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "WILL OOM", "bottleneck": "memory-bound", "high_count": 1, "issue": null }, + "bilateral": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "diffusion": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "WILL OOM", "bottleneck": "memory-bound", "high_count": 2, "issue": 1116, "notes": "Scalar diffusivity now passed as float to chunks. DataArray diffusivity passed as dask array via map_overlap." }, + "cost_distance": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "WILL OOM", "bottleneck": "memory-bound", "high_count": 2, "issue": 1118, "notes": "Memory guard added + da.block assembly. Finite max_cost path (map_overlap) was already safe." }, + "sky_view_factor": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "worley": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "flood": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "aspect": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": 1122, "notes": "northness/eastness now use da.cos/sin on dask arrays." }, + "terrain": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "RISKY", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "terrain_metrics": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "memory-bound", "high_count": 0, "issue": null }, + "slope": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "perlin": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "WILL OOM", "bottleneck": "memory-bound", "high_count": 0, "issue": null }, + "curvature": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "hillshade": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "contour": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "pathfinding": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 1, "issue": null }, + "erosion": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "WILL OOM", "bottleneck": "memory-bound", "high_count": 2, "issue": 1120, "notes": "Memory guard added. Algorithm inherently global." }, + "geodesic": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "N/A", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "balanced_allocation": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "WILL OOM", "bottleneck": "memory-bound", "high_count": 3, "issue": 1114, "notes": "Lazy source extraction + memory guard. Algorithm is inherently O(N*size) - documented limitation." }, + "corridor": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "polygonize": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "edge_detection": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "multispectral": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "fire": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "proximity": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "WILL OOM", "bottleneck": "memory-bound", "high_count": 3, "issue": 1111, "notes": "Memory guard added to line-sweep path. KDTree path (EUCLIDEAN/MANHATTAN + scipy) already had guards. GREAT_CIRCLE unbounded path already guarded." }, + "emerging_hotspots": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "dasymetric": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "memory-bound", "high_count": 0, "issue": 1126, "notes": "Memory guard added to validate_disaggregation. Core disaggregate uses map_blocks." }, + "classify": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "convolution": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "morphology": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "focal": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null }, + "glcm": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 1, "issue": null }, + "surface_distance": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "memory-bound", "high_count": 0, "issue": 1128, "notes": "Memory guard added to dd_grid allocation." }, + "mahalanobis": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null, "notes": "False positive. Numpy path materializes by design. Dask path uses lazy reductions + map_blocks." }, + "glcm": { "last_inspected": "2026-03-31T18:00:00Z", "oom_verdict": "SAFE", "bottleneck": "compute-bound", "high_count": 0, "issue": null, "notes": "Downgraded to MEDIUM. da.stack without rechunk is scheduling overhead, not OOM risk." } + } +}