diff --git a/.github/scripts/ruff/annotate-new-issues.sh b/.github/scripts/ruff/annotate-new-issues.sh new file mode 100755 index 0000000..1ea6b88 --- /dev/null +++ b/.github/scripts/ruff/annotate-new-issues.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# ============================================================================= +# Annotate NEW Issues Only +# ============================================================================= +# Generates GitHub annotations filtered to rules with positive deltas. +# Only issues from rules that INCREASED get annotated, not inherited issues. +# +# Input files (from collect-stats.sh): +# - rules_with_new_issues.txt: rule codes with positive deltas (one per line) +# - pr_ruff_output.json: all ruff issues (JSON-lines format) +# +# Environment: +# - GITHUB_WORKSPACE: checkout directory (for stripping absolute paths) +# +# Output: +# - GitHub annotations (::error file=...,line=...) printed to stdout +# ============================================================================= + +set -euo pipefail + +# Load shared utilities +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/lib.sh" + +D=".ruff-stats" + +log_section "Annotating new issues" + +# Exit silently if no new issues +if [ ! -s "$D/rules_with_new_issues.txt" ]; then + echo " ✓ No new issues to annotate" + exit 0 +fi + +# Build regex pattern from rules with new issues +# E.g., "SIM102|F401|RUF022" +PATTERN=$(paste -sd'|' "$D/rules_with_new_issues.txt") +RULE_COUNT=$(wc -l < "$D/rules_with_new_issues.txt" | tr -d ' ') + +echo " → Rules with new issues: $RULE_COUNT" +echo " → Pattern: $PATTERN" + +# Check environment +if [ -z "${GITHUB_WORKSPACE:-}" ]; then + echo " ⚠️ GITHUB_WORKSPACE not set (paths may be absolute)" +fi + +# Get workspace prefix for path stripping (with trailing slash) +WS_PREFIX=$(get_workspace_prefix) + +# Filter PR ruff output to only rules with positive deltas +# Then format as GitHub annotations +# Note: pr_ruff_output.json is JSON-lines (one object per line) +# Note: filenames are absolute, need to strip workspace prefix for GitHub +# Using -r (not -rs) for efficient line-by-line processing without slurping +# Guard: skip entries without .location (defensive, should not happen with ruff) + +# Generate annotations and count them +# Store in temp file to both output and count +TEMP_ANNOTATIONS="$D/annotations.txt" +jq -r --arg pattern "$PATTERN" --arg ws "$WS_PREFIX" ' + select(.code | test($pattern)) | + select(.location != null) | + "::error file=\(.filename | ltrimstr($ws)),line=\(.location.row),col=\(.location.column)::\(.code): \(.message)" +' "$D/pr_ruff_output.json" > "$TEMP_ANNOTATIONS" + +ANNOTATION_COUNT=$(wc -l < "$TEMP_ANNOTATIONS" | tr -d ' ') +echo " → Annotations: $ANNOTATION_COUNT" + +# Output annotations to stdout (for GitHub to pick up) +cat "$TEMP_ANNOTATIONS" diff --git a/.github/scripts/ruff/build-comment.sh b/.github/scripts/ruff/build-comment.sh new file mode 100755 index 0000000..c51c37e --- /dev/null +++ b/.github/scripts/ruff/build-comment.sh @@ -0,0 +1,312 @@ +#!/bin/bash +# ============================================================================= +# build-comment.sh - Ruff PR Comment Builder +# ============================================================================= +# Assembles the final PR comment from stats and tables. +# +# Inputs (in .ruff-stats/): +# - stats.env, category_delta_table.md, rules_delta_table.md, rules_rows_sorted.txt +# +# Environment variables (from caller): +# - FORMAT_FAILED: "true" if format check failed +# +# Outputs (in .ruff-stats/): +# - comment_body.md: Final PR comment body +# +# ============================================================================= +# COMMENT STRUCTURE & PHILOSOPHY +# ============================================================================= +# +# Title shows delta-based status: +# - Passing: "🧹 Ruff — ✅ No new issues" or "✅ 5 issues fixed" +# - Failing: "🧹 Ruff — ❌ 4 new issues" or "❌ 4 new issues, 2 fixed" +# +# Summary card (compact, scannable): +# - Lint: Shows NEW/FIXED breakdown, NOT just net delta +# - Top offender: Rule with biggest regression (if failing) +# - Format: Pass/fail status +# - Auto-fix: Percentage of NEW issues auto-fixable +# - Inherited: Count of pre-existing issues in modified code +# +# Required Actions (only when failing): +# - Numbered steps: auto-fix → format → manual fix → push +# - Steps shown conditionally based on what's needed +# - Doesn't ask you to fix inherited issues +# +# Context-aware tips: +# - Bug-related issues (F, B): Different tip than style issues +# - Emphasizes local testing before push +# +# ============================================================================= + +set -euo pipefail + +# Load shared utilities +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/lib.sh" + +D=".ruff-stats" +source "$D/stats.env" +FORMAT_FILES_CHECKED="${FORMAT_FILES_CHECKED:-0}" +FORMAT_NEW="${FORMAT_NEW:-0}" +FORMAT_FIXED="${FORMAT_FIXED:-0}" +FORMAT_INHERITED="${FORMAT_INHERITED:-0}" +DELTA_FIXABLE="${DELTA_FIXABLE:-0}" + +# Determine pass/fail status +REGRESSION_STATUS="fail" +FORMAT_STATUS="fail" +[ "$NEW_ISSUES" -eq 0 ] && REGRESSION_STATUS="pass" +[ "$FORMAT_NEW" -eq 0 ] && FORMAT_STATUS="pass" + +OVERALL_STATUS="pass" +[ "$REGRESSION_STATUS" = "fail" ] || [ "$FORMAT_STATUS" = "fail" ] && OVERALL_STATUS="fail" + +# Calculate top new issue (rule with most new issues) +# Format: type|category|abs_delta|rule|desc|main_count|pr_count|delta_str|fix_str|url +TOP_NEW="" +if [ "$NEW_ISSUES" -gt 0 ] && [ -s "$D/all_rule_rows.txt" ]; then + TOP_ROW=$(grep "^new|" "$D/all_rule_rows.txt" 2>/dev/null | sort -t'|' -k3 -rn | head -1 || true) + if [ -n "$TOP_ROW" ]; then + TOP_RULE=$(echo "$TOP_ROW" | cut -d'|' -f4) + TOP_DELTA=$(echo "$TOP_ROW" | cut -d'|' -f8) + TOP_NEW="$TOP_RULE $TOP_DELTA" + fi +fi + +# Calculate top fixed issue (rule with most fixed issues) +TOP_FIXED="" +if [ "$FIXED_ISSUES" -gt 0 ] && [ -s "$D/all_rule_rows.txt" ]; then + TOP_ROW=$(grep "^fixed|" "$D/all_rule_rows.txt" 2>/dev/null | sort -t'|' -k3 -rn | head -1 || true) + if [ -n "$TOP_ROW" ]; then + TOP_RULE=$(echo "$TOP_ROW" | cut -d'|' -f4) + TOP_DELTA=$(echo "$TOP_ROW" | cut -d'|' -f8) + TOP_FIXED="$TOP_RULE $TOP_DELTA" + fi +fi + +# ============================================================================= +# Build comment body +# ============================================================================= + +# Build title: 🧹 Ruff — [status] [what happened] +if [ "$OVERALL_STATUS" = "pass" ]; then + if [ "$FIXED_ISSUES" -gt 0 ]; then + TITLE="## 🧹 Ruff — ✅ $FIXED_ISSUES issues fixed" + else + TITLE="## 🧹 Ruff — ✅ No new issues" + fi +else + if [ "$REGRESSION_STATUS" = "fail" ]; then + if [ "$FIXED_ISSUES" -gt 0 ]; then + TITLE="## 🧹 Ruff — ❌ $NEW_ISSUES new issues, $FIXED_ISSUES fixed" + else + TITLE="## 🧹 Ruff — ❌ $NEW_ISSUES new issues" + fi + else + TITLE="## 🧹 Ruff — ❌ $FORMAT_NEW files need formatting" + fi +fi + +cat > "$D/comment_body.md" << HEADER + +$TITLE +HEADER + +# Summary card +echo "" >> "$D/comment_body.md" +echo "| Check | Result |" >> "$D/comment_body.md" +echo "|-------|--------|" >> "$D/comment_body.md" + +# Lint row with auto-fix and inherited inline +if [ "$REGRESSION_STATUS" = "pass" ]; then + if [ "$FIXED_ISSUES" -gt 0 ]; then + if [ "$INHERITED_ISSUES" -gt 0 ]; then + echo "| **Lint** | ✅ $FIXED_ISSUES fixed ($INHERITED_ISSUES inherited) |" >> "$D/comment_body.md" + else + echo "| **Lint** | ✅ $FIXED_ISSUES fixed |" >> "$D/comment_body.md" + fi + elif [ "$INHERITED_ISSUES" -gt 0 ]; then + echo "| **Lint** | ✅ Passing ($INHERITED_ISSUES inherited) |" >> "$D/comment_body.md" + else + echo "| **Lint** | ✅ Passing |" >> "$D/comment_body.md" + fi +else + # Build lint result: "N new (M auto-fixable), K inherited" + LINT_RESULT="$NEW_ISSUES new" + [ "$DELTA_FIXABLE" -gt 0 ] && LINT_RESULT="$LINT_RESULT ($DELTA_FIXABLE auto-fixable)" + [ "$INHERITED_ISSUES" -gt 0 ] && LINT_RESULT="$LINT_RESULT, $INHERITED_ISSUES inherited" + echo "| **Lint** | ❌ $LINT_RESULT |" >> "$D/comment_body.md" +fi + +[ -n "$TOP_NEW" ] && echo "| Top new | $TOP_NEW |" >> "$D/comment_body.md" +[ -n "$TOP_FIXED" ] && echo "| Top fixed | $TOP_FIXED |" >> "$D/comment_body.md" + +if [ "$FORMAT_STATUS" = "pass" ]; then + if [ "$FORMAT_FIXED" -gt 0 ]; then + echo "| **Format** | ✅ $FORMAT_FIXED fixed ($FORMAT_FILES_CHECKED files) |" >> "$D/comment_body.md" + elif [ "$FORMAT_INHERITED" -gt 0 ]; then + echo "| **Format** | ✅ Passing ($FORMAT_INHERITED inherited, $FORMAT_FILES_CHECKED files) |" >> "$D/comment_body.md" + else + echo "| **Format** | ✅ Passing ($FORMAT_FILES_CHECKED files) |" >> "$D/comment_body.md" + fi +else + if [ "$FORMAT_INHERITED" -gt 0 ]; then + echo "| **Format** | ❌ $FORMAT_NEW file(s) need formatting, $FORMAT_INHERITED inherited ($FORMAT_FILES_CHECKED checked) |" >> "$D/comment_body.md" + else + echo "| **Format** | ❌ $FORMAT_NEW file(s) need formatting ($FORMAT_FILES_CHECKED checked) |" >> "$D/comment_body.md" + fi +fi +echo "" >> "$D/comment_body.md" + +# Format files list (if format failed) +# Try format_files.txt first, fall back to format_new.txt +FORMAT_FILES_LIST="" +if [ -s "$D/format_files.txt" ]; then + FORMAT_FILES_LIST="$D/format_files.txt" +elif [ -s "$D/format_new.txt" ]; then + FORMAT_FILES_LIST="$D/format_new.txt" +fi + +if [ "$FORMAT_STATUS" = "fail" ] && [ -n "$FORMAT_FILES_LIST" ]; then + echo "" >> "$D/comment_body.md" + echo "**Files needing format:**" >> "$D/comment_body.md" + echo '```' >> "$D/comment_body.md" + cat "$FORMAT_FILES_LIST" >> "$D/comment_body.md" + echo '```' >> "$D/comment_body.md" +fi + +# High-priority alerts +if [ "$HAS_ERROR_PRONE" = "true" ]; then + echo "" >> "$D/comment_body.md" + echo "> [!IMPORTANT]" >> "$D/comment_body.md" + echo "> 🐛 **$ERROR_PRONE_RULES** — catches real bugs, not just style issues. Review carefully." >> "$D/comment_body.md" +fi + +# Required Actions section +if [ "$OVERALL_STATUS" = "fail" ]; then + echo "" >> "$D/comment_body.md" + echo "> [!IMPORTANT]" >> "$D/comment_body.md" + + STEP=1 + REMAINING=$((NEW_ISSUES - DELTA_FIXABLE)) + + # Step 1: Auto-fix (if there are auto-fixable issues or format failed) + if { [ "$REGRESSION_STATUS" = "fail" ] && [ "$DELTA_FIXABLE" -gt 0 ]; } || [ "$FORMAT_STATUS" = "fail" ]; then + echo "> $STEP. Auto-fix issues:" >> "$D/comment_body.md" + + # Build VSCode commands based on what's needed + VSCODE_CMDS="" + if [ "$FORMAT_STATUS" = "fail" ]; then + VSCODE_CMDS="\"Ruff: Format document\" + \"Ruff: Format imports\"" + fi + if [ "$REGRESSION_STATUS" = "fail" ] && [ "$DELTA_FIXABLE" -gt 0 ]; then + if [ -n "$VSCODE_CMDS" ]; then + VSCODE_CMDS="$VSCODE_CMDS + \"Ruff: Fix all auto-fixable problems\"" + else + VSCODE_CMDS="\"Ruff: Fix all auto-fixable problems\"" + fi + fi + echo "> - **VSCode:** $VSCODE_CMDS" >> "$D/comment_body.md" + + # Build CLI command based on what's needed + CLI_CMDS="" + if [ "$REGRESSION_STATUS" = "fail" ] && [ "$DELTA_FIXABLE" -gt 0 ]; then + CLI_CMDS="ruff check --fix ." + fi + if [ "$FORMAT_STATUS" = "fail" ]; then + if [ -n "$CLI_CMDS" ]; then + CLI_CMDS="$CLI_CMDS && ruff format ." + else + CLI_CMDS="ruff format ." + fi + fi + echo "> - **CLI:** \`$CLI_CMDS\`" >> "$D/comment_body.md" + + # Build bot command based on what's needed + if [ "$REGRESSION_STATUS" = "fail" ] && [ "$DELTA_FIXABLE" -gt 0 ] && [ "$FORMAT_STATUS" = "fail" ]; then + BOT_CMD="/ruff fix" + elif [ "$REGRESSION_STATUS" = "fail" ] && [ "$DELTA_FIXABLE" -gt 0 ]; then + BOT_CMD="/ruff check --fix" + else + BOT_CMD="/ruff format" + fi + echo "> - **Bot:** Comment \`$BOT_CMD\` on this PR" >> "$D/comment_body.md" + STEP=$((STEP + 1)) + fi + + # Step: Manual fixes (if there are non-auto-fixable NEW issues) + if [ "$REGRESSION_STATUS" = "fail" ] && [ "$REMAINING" -gt 0 ]; then + echo "> $STEP. Manually fix $REMAINING new issue(s) that can't be auto-fixed (see **Files changed**)" >> "$D/comment_body.md" + STEP=$((STEP + 1)) + fi + + # Step: Push + echo "> $STEP. Push" >> "$D/comment_body.md" + + # VSCode setup tips (only when failing - remediation focused) + cat >> "$D/comment_body.md" << 'EOF' + +> [!TIP] +>
+> VSCode: Set up auto-format and code actions on save +> +> **Settings UI:** Search "format on save" and "code actions on save" +> +> **settings.json:** +> ```json +> "editor.formatOnSave": true, +> "editor.codeActionsOnSave": { +> "source.fixAll.ruff": "explicit", +> "source.organizeImports.ruff": "explicit" +> } +> ``` +> +> [Full setup guide →](https://docs.astral.sh/ruff/editors/setup/#vs-code) +> +>
+EOF +fi + +# New Issues section (shown first) +if [ "$NEW_ISSUES" -gt 0 ] && [ -s "$D/new_table.md" ]; then + echo "" >> "$D/comment_body.md" + echo "## 🔴 New Issues ($NEW_ISSUES)" >> "$D/comment_body.md" + echo "" >> "$D/comment_body.md" + cat "$D/new_table.md" >> "$D/comment_body.md" +fi + +# Fixed Issues section (shown last) +if [ "$FIXED_ISSUES" -gt 0 ] && [ -s "$D/fixed_table.md" ]; then + echo "" >> "$D/comment_body.md" + echo "## 🟢 Fixed Issues ($FIXED_ISSUES)" >> "$D/comment_body.md" + echo "" >> "$D/comment_body.md" + cat "$D/fixed_table.md" >> "$D/comment_body.md" +fi + +# Bot commands reference (always shown - useful reference info) +# Uses print_bot_commands_markdown from lib.sh (single source of truth) +{ + echo "" + echo "---" + echo "" + echo "
" + echo "Bot commands for this PR" + echo "" + echo "Type \`/ruff\` or \`@ruff\` followed by a command in a PR comment:" + echo "" + print_bot_commands_markdown + echo "" + echo "**Reactions:** 👀 = processing, 🚀 = all passing, 😕 = issues remain (details in reply)" + echo "" + echo "
" +} >> "$D/comment_body.md" + +# Logs link (at very end) +if [ -n "${GITHUB_RUN_ID:-}" ] && [ -n "${GITHUB_REPOSITORY:-}" ]; then + echo "" >> "$D/comment_body.md" + echo "[View full logs →](https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID)" >> "$D/comment_body.md" +fi + +echo "=== Comment body ===" +cat "$D/comment_body.md" diff --git a/.github/scripts/ruff/build-tables.sh b/.github/scripts/ruff/build-tables.sh new file mode 100755 index 0000000..7bae693 --- /dev/null +++ b/.github/scripts/ruff/build-tables.sh @@ -0,0 +1,327 @@ +#!/bin/bash +# ============================================================================= +# build-tables.sh - Ruff Hierarchical Delta Tables Builder +# ============================================================================= +# Builds hierarchical tables with categories and rules grouped together. +# Category rows are bold with emoji, rule rows are italicized without emoji. +# +# Outputs (in .ruff-stats/): +# - new_table.md (new issues, includes Auto-fixable column) +# - fixed_table.md (fixed issues, no Auto-fixable column) +# +# ============================================================================= +# EMOJI CODING SYSTEM +# ============================================================================= +# +# Category prefixes (visual classification): +# 🐛 Pyflakes/Bugbear (F, B) - catches bugs +# 🎨 pycodestyle (E, W) - style errors/warnings +# ⚡ pyupgrade/simplify (UP, SIM) - modern syntax +# 📦 isort/Ruff (I, RUF) - import/package organization +# 📊 NumPy/Pandas (NPY, PD) - array/dataframe patterns +# +# ============================================================================= + +set -euo pipefail + +# Load shared utilities +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/lib.sh" + +D=".ruff-stats" +source "$D/stats.env" + +# ============================================================================= +# Helper: Get category display name, tool name, and description +# Format: display|tool|description +# ============================================================================= +get_category_info() { + local cat="$1" + case "$cat" in + ANN) echo "📝 $cat|flake8-annotations|Type annotation issues" ;; + F) echo "🐛 $cat|Pyflakes|Undefined names, unused imports" ;; + E) echo "🎨 $cat|pycodestyle|PEP 8 formatting violations" ;; + W) echo "🎨 $cat|pycodestyle|PEP 8 style warnings" ;; + B) echo "🐛 $cat|Bugbear|Likely bugs and design issues" ;; + UP) echo "⚡ $cat|pyupgrade|Upgrade to modern Python syntax" ;; + SIM) echo "⚡ $cat|flake8-simplify|Simplify complex expressions" ;; + RUF) echo "📦 $cat|Ruff|Ruff-specific linting rules" ;; + NPY) echo "📊 $cat|NumPy|NumPy-specific patterns" ;; + PD) echo "📊 $cat|pandas-vet|pandas-specific patterns" ;; + I) echo "📦 $cat|isort|Import order and grouping" ;; + C90) echo "📐 $cat|mccabe|Cyclomatic complexity" ;; + N) echo "📝 $cat|pep8-naming|PEP 8 naming conventions" ;; + D) echo "📝 $cat|pydocstyle|Docstring conventions" ;; + S) echo "🔒 $cat|Bandit|Security issues" ;; + T) echo "🐛 $cat|flake8-print|Print statements" ;; + ERA) echo "🧹 $cat|eradicate|Commented-out code" ;; + PL) echo "🐛 $cat|Pylint|Pylint rules" ;; + TRY) echo "🐛 $cat|tryceratops|Exception handling" ;; + FLY) echo "⚡ $cat|flynt|f-string conversion" ;; + PERF) echo "⚡ $cat|Perflint|Performance issues" ;; + FURB) echo "⚡ $cat|refurb|Code modernization" ;; + LOG) echo "📝 $cat|flake8-logging|Logging issues" ;; + G) echo "📝 $cat|flake8-logging-format|Logging format" ;; + *) echo "📋 $cat|$cat|$cat rules" ;; + esac +} + +# ============================================================================= +# Helper: Extract category prefix from rule code (F401 -> F, RUF022 -> RUF) +# ============================================================================= +get_category_from_rule() { + echo "$1" | sed 's/[0-9].*//' +} + +# ============================================================================= +# Helper: Calculate auto-fixable string for a category/rule +# Usage: get_fix_str +# +# Fix applicability (from ruff): +# Safe = Fix guaranteed to preserve semantics (apply automatically) +# Unsafe = Fix may change behavior (review before applying) +# +# Output markers: +# ✅ = All fixable issues are safe (safe to auto-apply) +# ❓ = Some/all fixable issues are unsafe (review before applying) +# ❌ = No auto-fixes available (manual fix required) +# ============================================================================= +get_fix_str() { + local key="$1" + local delta="$2" + local is_category="$3" + + local delta_fixable + if [ "$is_category" = "true" ]; then + delta_fixable=$(grep "^$key " "$D/delta_category_fixable.txt" 2>/dev/null | awk '{print $2}' || true) + else + delta_fixable=$(grep "^$key " "$D/delta_rule_fixable.txt" 2>/dev/null | awk '{print $2}' || true) + fi + delta_fixable=${delta_fixable:-0} + + if [ "$delta_fixable" -gt 0 ]; then + local SAFE_COUNT UNSAFE_COUNT + if [ "$is_category" = "true" ]; then + # Use regex to ensure prefix is followed by digit (prevents E matching ERA, etc.) + SAFE_COUNT=$(jq --arg key "$key" '[.[] | select((.code | test("^" + $key + "[0-9]")) and .fix.applicability == "safe")] | length' "$D/new_fixable_issues.json" 2>/dev/null || echo '0') + UNSAFE_COUNT=$(jq --arg key "$key" '[.[] | select((.code | test("^" + $key + "[0-9]")) and .fix.applicability == "unsafe")] | length' "$D/new_fixable_issues.json" 2>/dev/null || echo '0') + else + SAFE_COUNT=$(jq --arg key "$key" '[.[] | select(.code == $key and .fix.applicability == "safe")] | length' "$D/new_fixable_issues.json" 2>/dev/null || echo '0') + UNSAFE_COUNT=$(jq --arg key "$key" '[.[] | select(.code == $key and .fix.applicability == "unsafe")] | length' "$D/new_fixable_issues.json" 2>/dev/null || echo '0') + fi + + local FIX_LABELS="" + [ "$SAFE_COUNT" -gt 0 ] && FIX_LABELS="$SAFE_COUNT Safe" + [ "$UNSAFE_COUNT" -gt 0 ] && FIX_LABELS="${FIX_LABELS:+$FIX_LABELS, }$UNSAFE_COUNT Unsafe" + + local fix_pct=$((delta_fixable * 100 / delta)) + if [ "$SAFE_COUNT" -gt 0 ]; then + echo "$FIX_LABELS ($fix_pct%) ✅" + else + echo "$FIX_LABELS ($fix_pct%) ❓" + fi + else + echo "0 ❌" + fi +} + +# ============================================================================= +# Prepare rule counts and metadata +# ============================================================================= +log_section "Preparing rule data" + +jq_count_by_rule "$D/pr_ruff_output.json" > "$D/pr_all_rules.txt" 2>/dev/null || touch "$D/pr_all_rules.txt" +jq_count_by_rule "$D/main_ruff_output.json" > "$D/main_rule_counts.txt" 2>/dev/null || touch "$D/main_rule_counts.txt" +# Note: new_fixable_issues.json is a JSON array (not JSON-lines), so use -r not -rs +jq -r '[.[] | .code] | group_by(.) | map("\(.[0]) \(length)")[]' "$D/new_fixable_issues.json" > "$D/delta_rule_fixable.txt" 2>/dev/null || touch "$D/delta_rule_fixable.txt" +cat "$D/pr_all_rules.txt" "$D/main_rule_counts.txt" | awk '{print $1}' | sort -u > "$D/all_rules.txt" + +# ============================================================================= +# Build rule rows (will be grouped by category later) +# Format: category|abs_delta|rule|desc|main_count|pr_count|delta_str|fix_str|url +# ============================================================================= +log_section "Building rule rows" + +> "$D/all_rule_rows.txt" + +while read rule; do + pr_count=$(grep "^$rule " "$D/pr_all_rules.txt" 2>/dev/null | awk '{print $2}' || true) + pr_count=${pr_count:-0} + main_count=$(grep "^$rule " "$D/main_rule_counts.txt" 2>/dev/null | awk '{print $2}' || true) + main_count=${main_count:-0} + delta=$((pr_count - main_count)) + [ "$delta" -eq 0 ] && continue + + # Get description from actual violation; strip backtick-quoted identifiers to keep it generic. + raw_desc=$(jq -rs --arg rule "$rule" '[.[] | select(.code == $rule)][0].message // empty' "$D/pr_ruff_output.json" 2>/dev/null || true) + raw_url=$(jq -rs --arg rule "$rule" '[.[] | select(.code == $rule)][0].url // empty' "$D/pr_ruff_output.json" 2>/dev/null || true) + + if [ -z "$raw_desc" ]; then + raw_desc=$(jq -rs --arg rule "$rule" '[.[] | select(.code == $rule)][0].message // empty' "$D/main_ruff_output.json" 2>/dev/null || true) + fi + + if [ -z "$raw_url" ]; then + raw_url=$(jq -rs --arg rule "$rule" '[.[] | select(.code == $rule)][0].url // empty' "$D/main_ruff_output.json" 2>/dev/null || true) + fi + + DESC=$(printf '%s' "$raw_desc" | sed 's/`[^`]*`//g; s/ */ /g; s/^ //; s/ $//' | tr -d '|\n\r') + URL="$raw_url" + + category=$(get_category_from_rule "$rule") + + if [ "$delta" -gt 0 ]; then + # NEW issue + fix_str=$(get_fix_str "$rule" "$delta" "false") + echo "new|$category|$delta|$rule|$DESC|$main_count|$pr_count|+$delta 🔴|$fix_str|$URL" >> "$D/all_rule_rows.txt" + else + # FIXED issue + abs_delta=$((-delta)) + echo "fixed|$category|$abs_delta|$rule|$DESC|$main_count|$pr_count|-$abs_delta 🟢||$URL" >> "$D/all_rule_rows.txt" + fi +done < "$D/all_rules.txt" + +# ============================================================================= +# Calculate total fix string for new issues +# ============================================================================= +if [ "$NEW_ISSUES" -gt 0 ] && [ "$DELTA_FIXABLE" -gt 0 ]; then + TOTAL_SAFE=$(jq '[.[] | select(.fix.applicability == "safe")] | length' "$D/new_fixable_issues.json" 2>/dev/null || echo '0') + TOTAL_UNSAFE=$(jq '[.[] | select(.fix.applicability == "unsafe")] | length' "$D/new_fixable_issues.json" 2>/dev/null || echo '0') + TOTAL_FIX_LABELS="" + [ "$TOTAL_SAFE" -gt 0 ] && TOTAL_FIX_LABELS="$TOTAL_SAFE Safe" + [ "$TOTAL_UNSAFE" -gt 0 ] && TOTAL_FIX_LABELS="${TOTAL_FIX_LABELS:+$TOTAL_FIX_LABELS, }$TOTAL_UNSAFE Unsafe" + if [ "$TOTAL_SAFE" -gt 0 ]; then + TOTAL_FIX_STR="$TOTAL_FIX_LABELS ($DELTA_FIX_PCT%) ✅" + else + TOTAL_FIX_STR="$TOTAL_FIX_LABELS ($DELTA_FIX_PCT%) ❓" + fi +elif [ "$NEW_ISSUES" -gt 0 ]; then + TOTAL_FIX_STR="0 ❌" +else + TOTAL_FIX_STR="-" +fi + +# ============================================================================= +# Generate per-category NEW issues tables +# ============================================================================= +log_section "Building new issues tables" + +if [ "$NEW_ISSUES" -gt 0 ]; then + { + # Build category summary data (for sorting and summary table) + > "$D/new_category_summary.txt" + grep "^new|" "$D/all_rule_rows.txt" | cut -d'|' -f2 | sort -u | while read -r cat; do + cat_main=$(grep "^$cat " "$D/main_category_counts.txt" 2>/dev/null | awk '{print $2}' || echo "0") + cat_main=${cat_main:-0} + + cat_pr=$(grep "^$cat " "$D/pr_category_counts.txt" 2>/dev/null | awk '{print $2}' || echo "0") + cat_pr=${cat_pr:-0} + + # Sum positive rule deltas for this category; category net delta may be <= 0. + cat_delta=$(awk -F'|' -v cat="$cat" '$1 == "new" && $2 == cat {sum += $3} END {print sum + 0}' "$D/all_rule_rows.txt") + + cat_info=$(get_category_info "$cat") + cat_display=$(echo "$cat_info" | cut -d'|' -f1) + cat_tool=$(echo "$cat_info" | cut -d'|' -f2) + cat_fix_str=$(get_fix_str "$cat" "$cat_delta" "true") + echo "$cat_delta|$cat|$cat_display|$cat_tool|$cat_main|$cat_pr|$cat_fix_str" + done | sort -t'|' -k1 -rn > "$D/new_category_summary.txt" + + # Generate per-category tables + while IFS='|' read -r cat_delta cat_code cat_display cat_tool cat_main cat_pr cat_fix_str; do + echo "### $cat_display — $cat_tool" + echo "" + echo "| Rule | Description | Main | PR | Δ | Auto-fixable |" + echo "|------|-------------|-----:|---:|--:|-------------:|" + + grep "^new|$cat_code|" "$D/all_rule_rows.txt" | sort -t'|' -k3 -rn | while IFS='|' read -r _ _ _ rule desc main_count pr_count delta_str fix_str url; do + if [ -n "$url" ]; then + rule_link="[$rule]($url)" + else + rule_link="$rule" + fi + echo "| $rule_link | $desc | $main_count | $pr_count | $delta_str | $fix_str |" + done + + echo "| **Subtotal** | | **$cat_main** | **$cat_pr** | **+$cat_delta 🔴** | **$cat_fix_str** |" + echo "" + done < "$D/new_category_summary.txt" + + # Summary table + echo "### Summary" + echo "" + echo "| Category | Tool | Main | PR | Δ | Auto-fixable |" + echo "|----------|------|-----:|---:|--:|-------------:|" + while IFS='|' read -r cat_delta cat_code cat_display cat_tool cat_main cat_pr cat_fix_str; do + echo "| $cat_display | $cat_tool | $cat_main | $cat_pr | +$cat_delta 🔴 | $cat_fix_str |" + done < "$D/new_category_summary.txt" + echo "| **Total** | | **$MAIN_TOTAL** | **$PR_TOTAL** | **+$NEW_ISSUES 🔴** | **$TOTAL_FIX_STR** |" + + } > "$D/new_table.md" + CAT_COUNT=$(wc -l < "$D/new_category_summary.txt" | tr -d ' ') + echo " → new_table.md: $NEW_ISSUES issues across $CAT_COUNT categories" +else + > "$D/new_table.md" + echo " → new_table.md: empty (no new issues)" +fi + +# ============================================================================= +# Generate per-category FIXED issues tables +# ============================================================================= +log_section "Building fixed issues tables" + +if [ "$FIXED_ISSUES" -gt 0 ]; then + { + # Build category summary data (for sorting and summary table) + > "$D/fixed_category_summary.txt" + grep "^fixed|" "$D/all_rule_rows.txt" | cut -d'|' -f2 | sort -u | while read cat; do + cat_main=$(grep "^$cat " "$D/main_category_counts.txt" 2>/dev/null | awk '{print $2}' || echo "0") + cat_main=${cat_main:-0} + cat_pr=$(grep "^$cat " "$D/pr_category_counts.txt" 2>/dev/null | awk '{print $2}' || echo "0") + cat_pr=${cat_pr:-0} + cat_delta=$((cat_pr - cat_main)) + abs_delta=$((-cat_delta)) + cat_info=$(get_category_info "$cat") + cat_display=$(echo "$cat_info" | cut -d'|' -f1) + cat_tool=$(echo "$cat_info" | cut -d'|' -f2) + echo "$abs_delta|$cat|$cat_display|$cat_tool|$cat_main|$cat_pr" + done | sort -t'|' -k1 -rn > "$D/fixed_category_summary.txt" + + # Generate per-category tables + while IFS='|' read -r abs_delta cat_code cat_display cat_tool cat_main cat_pr; do + echo "### $cat_display — $cat_tool" + echo "" + echo "| Rule | Description | Main | PR | Δ |" + echo "|------|-------------|-----:|---:|--:|" + + grep "^fixed|$cat_code|" "$D/all_rule_rows.txt" | sort -t'|' -k3 -rn | while IFS='|' read -r _ _ _ rule desc main_count pr_count delta_str _ url; do + if [ -n "$url" ]; then + rule_link="[$rule]($url)" + else + rule_link="$rule" + fi + echo "| $rule_link | $desc | $main_count | $pr_count | $delta_str |" + done + + echo "| **Subtotal** | | **$cat_main** | **$cat_pr** | **-$abs_delta 🟢** |" + echo "" + done < "$D/fixed_category_summary.txt" + + # Summary table + echo "### Summary" + echo "" + echo "| Category | Tool | Main | PR | Δ |" + echo "|----------|------|-----:|---:|--:|" + while IFS='|' read -r abs_delta cat_code cat_display cat_tool cat_main cat_pr; do + echo "| $cat_display | $cat_tool | $cat_main | $cat_pr | -$abs_delta 🟢 |" + done < "$D/fixed_category_summary.txt" + echo "| **Total** | | **$MAIN_TOTAL** | **$PR_TOTAL** | **-$FIXED_ISSUES 🟢** |" + + } > "$D/fixed_table.md" + CAT_COUNT=$(wc -l < "$D/fixed_category_summary.txt" | tr -d ' ') + echo " → fixed_table.md: $FIXED_ISSUES issues across $CAT_COUNT categories" +else + > "$D/fixed_table.md" + echo " → fixed_table.md: empty (no fixed issues)" +fi + +log_section "Tables built" diff --git a/.github/scripts/ruff/collect-stats.sh b/.github/scripts/ruff/collect-stats.sh new file mode 100755 index 0000000..7601bd5 --- /dev/null +++ b/.github/scripts/ruff/collect-stats.sh @@ -0,0 +1,346 @@ +#!/bin/bash +# ============================================================================= +# collect-stats.sh - Ruff Statistics Collection +# ============================================================================= +# Analyzes PR and main branches, calculates delta statistics. +# +# Prerequisites: +# - Git repository with origin/main reference +# - RUFF_OUTPUT_FORMAT should be unset (caller's responsibility) +# +# Outputs (in .ruff-stats/): +# - stats.env: All computed variables for GitHub outputs +# - Various intermediate files for table generation +# +# Exit codes: +# 0: Success (stats collected) +# Non-zero: Script error (file operations, jq parsing, etc.) +# +# ============================================================================= +# KEY DESIGN DECISIONS +# ============================================================================= +# +# 1. DELTA-BASED PHILOSOPHY (Critical) +# - NEW_ISSUES = sum of positive rule deltas (issues YOU introduced) +# - FIXED_ISSUES = sum of negative rule deltas (issues YOU fixed) +# - INHERITED_ISSUES = pre-existing issues in modified code +# - Workflow PASSES if NEW_ISSUES = 0, regardless of INHERITED_ISSUES +# - Rationale: Contributors shouldn't be blocked for pre-existing problems +# +# 2. RULE-LEVEL DELTA CALCULATION (Critical) +# - Deltas calculated from individual rules, NOT categories +# - Prevents masked regressions (e.g., RUF022 +4 hidden by RUF overall -6) +# - Category-level calculation misses regressions within improving categories +# +# 3. AUTO-FIX SCOPING (NEW issues only) +# - delta_fixable_issues.json filtered to PR-changed files ONLY +# - Further filtered to rules with positive deltas (NEW issues only) +# - Rationale: Don't recommend auto-fixing inherited issues or unrelated files +# - DELTA_FIX_PCT = percentage of NEW issues that are auto-fixable +# +# 4. FILES_FILTER (Path Matching) +# - Built from: git diff --name-only origin/main...HEAD +# - Ruff outputs absolute paths, git outputs relative paths +# - Must strip workspace prefix from ruff paths before matching +# +# ============================================================================= + +set -euo pipefail + +# Load shared utilities +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/lib.sh" + +# All temp files go in .ruff-stats/ to avoid clutter +D=".ruff-stats" +mkdir -p "$D" +# Preserve format_*.txt artifacts created earlier in the workflow; clear everything else. + find "$D" -maxdepth 1 -type f ! -name 'format_*' -delete 2>/dev/null || true + +# ============================================================================= +# PR Branch Analysis +# ============================================================================= +log_section "Analyzing PR branch" + +PR_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown") +PR_SHA=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown") +echo " → Branch: $PR_BRANCH @ $PR_SHA" + +ruff check . --output-format json-lines > "$D/pr_ruff_output.json" || true +validate_json "$D/pr_ruff_output.json" "PR output" + +# Extract statistics with jq +STATS=$(jq -s '{total: length, fixable: [.[] | select(.fix)] | length}' "$D/pr_ruff_output.json" 2>/dev/null || echo '{"total":0,"fixable":0}') +PR_TOTAL=$(echo "$STATS" | jq -r '.total') +PR_FIXABLE=$(echo "$STATS" | jq -r '.fixable') +echo " → Issues: $PR_TOTAL total, $PR_FIXABLE fixable" + +# Extract category stats for PR (maintain "CAT COUNT" format) +jq_count_by_category "$D/pr_ruff_output.json" \ + > "$D/pr_category_counts.txt" 2>/dev/null || touch "$D/pr_category_counts.txt" +log_file "$D/pr_category_counts.txt" "Categories" + +# Count fixable per category (maintain "CAT COUNT" format) +jq_count_fixable_by_category "$D/pr_ruff_output.json" \ + > "$D/pr_category_fixable.txt" 2>/dev/null || touch "$D/pr_category_fixable.txt" + +# Extract top 5 specific rules (maintain "COUNT RULE" format for sorting) +jq -rs '[.[] | .code] | group_by(.) | map("\(length) \(.[0])")[]' "$D/pr_ruff_output.json" | \ + sort -rn | head -5 > "$D/pr_top_rules.txt" 2>/dev/null || touch "$D/pr_top_rules.txt" +if [ -s "$D/pr_top_rules.txt" ]; then + echo " → Top rules: $(head -1 "$D/pr_top_rules.txt" | awk '{print $2 " (" $1 ")"}')" +fi + +# ============================================================================= +# Main Branch Analysis +# ============================================================================= +log_section "Analyzing main branch" + +# Stash any uncommitted changes +git stash --include-untracked --quiet 2>/dev/null || true + +# Checkout main branch +if git checkout origin/main --quiet 2>/dev/null; then + MAIN_REF="origin/main" +elif git checkout main --quiet 2>/dev/null; then + MAIN_REF="main" +else + log_error "Failed to checkout main branch" + exit 1 +fi +MAIN_SHA=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown") +echo " → Branch: $MAIN_REF @ $MAIN_SHA" + +ruff check . --output-format json-lines > "$D/main_ruff_output.json" || true +validate_json "$D/main_ruff_output.json" "Main output" + +# Extract statistics with jq +MAIN_TOTAL=$(jq -s 'length' "$D/main_ruff_output.json" 2>/dev/null || echo '0') +echo " → Issues: $MAIN_TOTAL total" + +# Extract category stats for main (maintain "CAT COUNT" format) +jq_count_by_category "$D/main_ruff_output.json" \ + > "$D/main_category_counts.txt" 2>/dev/null || touch "$D/main_category_counts.txt" + +# Return to PR branch +git checkout - --quiet 2>/dev/null || log_warn "Failed to checkout previous branch" +git stash pop --quiet 2>/dev/null || true + +# ============================================================================= +# New vs Inherited Analysis +# ============================================================================= +log_section "Calculating deltas" + +# Get files changed in this PR +git diff --name-only origin/main...HEAD > "$D/pr_changed_files.txt" +CHANGED_FILES=$(wc -l < "$D/pr_changed_files.txt" | tr -d ' ') +echo " → Changed files: $CHANGED_FILES" + +# Build jq filter for changed files +FILES_FILTER=$(text_to_json_array "$D/pr_changed_files.txt") + +# Calculate NEW_ISSUES = sum of positive deltas (issues you introduced) +# Calculate FIXED_ISSUES = sum of negative deltas (issues you fixed) +# IMPORTANT: Must use rule-level deltas, not category-level, to catch regressions +# in individual rules that are masked by category-level improvements + +# Get rule counts for both branches first (needed below) +jq_count_by_rule "$D/pr_ruff_output.json" \ + > "$D/pr_all_rules_early.txt" 2>/dev/null || touch "$D/pr_all_rules_early.txt" +jq_count_by_rule "$D/main_ruff_output.json" \ + > "$D/main_all_rules_early.txt" 2>/dev/null || touch "$D/main_all_rules_early.txt" + +PR_RULES=$(wc -l < "$D/pr_all_rules_early.txt" | tr -d ' ') +MAIN_RULES=$(wc -l < "$D/main_all_rules_early.txt" | tr -d ' ') +echo " → Unique rules: PR=$PR_RULES, Main=$MAIN_RULES" + +# Calculate from rule-level deltas +rm -f "$D/positive_deltas.txt" "$D/negative_deltas.txt" "$D/rules_with_new_issues.txt" +touch "$D/positive_deltas.txt" "$D/negative_deltas.txt" # Ensure files exist (may remain empty) + +cat "$D/pr_all_rules_early.txt" "$D/main_all_rules_early.txt" | \ + awk '{print $1}' | sort -u | while read rule; do + # Use || true to prevent grep exit code 1 from killing script (pipefail) + pr_count=$(grep "^$rule " "$D/pr_all_rules_early.txt" 2>/dev/null | awk '{print $2}' || true) + pr_count=${pr_count:-0} + main_count=$(grep "^$rule " "$D/main_all_rules_early.txt" 2>/dev/null | awk '{print $2}' || true) + main_count=${main_count:-0} + delta=$((pr_count - main_count)) + + if [ "$delta" -gt 0 ]; then + echo "$delta" >> "$D/positive_deltas.txt" + echo "$rule" >> "$D/rules_with_new_issues.txt" + elif [ "$delta" -lt 0 ]; then + echo "$((- delta))" >> "$D/negative_deltas.txt" # Store absolute value + fi +done + +# Sum deltas from files (files guaranteed to exist, may be empty) +NEW_ISSUES=$(awk '{sum+=$1} END {print sum+0}' "$D/positive_deltas.txt") +FIXED_ISSUES=$(awk '{sum+=$1} END {print sum+0}' "$D/negative_deltas.txt") + +# INHERITED_ISSUES = issues that existed on main and still exist in PR +# Formula: MAIN_TOTAL - FIXED_ISSUES = issues from main that weren't fixed +INHERITED_ISSUES=$((MAIN_TOTAL - FIXED_ISSUES)) +if [ "$INHERITED_ISSUES" -lt 0 ]; then + INHERITED_ISSUES=0 +fi + +# Show delta breakdown with visual indicators +if [ "$NEW_ISSUES" -gt 0 ]; then + echo " 🔴 New issues: $NEW_ISSUES" + if [ -f "$D/rules_with_new_issues.txt" ]; then + echo " Rules: $(paste -sd', ' "$D/rules_with_new_issues.txt")" + fi +else + echo " ✓ New issues: 0" +fi + +if [ "$FIXED_ISSUES" -gt 0 ]; then + echo " 🟢 Fixed issues: $FIXED_ISSUES" +else + echo " → Fixed issues: 0" +fi + +echo " → Inherited: $INHERITED_ISSUES" + +# ============================================================================= +# Calculate Deltas +# ============================================================================= +DELTA=$((PR_TOTAL - MAIN_TOTAL)) +if [ "$DELTA" -gt 0 ]; then + DELTA_STR="+$DELTA from main" +elif [ "$DELTA" -lt 0 ]; then + DELTA_STR="**$DELTA from main** 🎉" +else + DELTA_STR="same as main" +fi + +if [ "$PR_TOTAL" -gt 0 ] && [ "$PR_FIXABLE" -gt 0 ]; then + FIX_PCT=$((PR_FIXABLE * 100 / PR_TOTAL)) +else + FIX_PCT=0 +fi + +# ============================================================================= +# Calculate Delta Fixable (fixable issues in files changed by PR) +# ============================================================================= +log_section "Analyzing auto-fixable issues" + +# Get fixable issues only in PR-changed files using jq +# Note: ruff outputs absolute paths, git diff outputs relative paths +# We need to convert ruff's absolute paths to relative before matching +WORKSPACE=$(get_workspace_prefix) +jq -s --argjson files "$FILES_FILTER" --arg workspace "$WORKSPACE" \ + '[.[] | select(.fix and (.filename | sub($workspace; "") as $rel | $files | index($rel)))]' \ + "$D/pr_ruff_output.json" > "$D/delta_fixable_issues.json" + +DELTA_FIXABLE_IN_CHANGED=$(jq 'length' "$D/delta_fixable_issues.json") +echo " → Fixable in changed files: $DELTA_FIXABLE_IN_CHANGED" + +# ============================================================================= +# Filter to NEW fixable issues only (not inherited fixable) +# ============================================================================= +# Filter delta_fixable_issues.json to only NEW fixable (rules with positive deltas) +if [ -f "$D/rules_with_new_issues.txt" ] && [ -s "$D/rules_with_new_issues.txt" ]; then + # Build jq array of rules with new issues + RULES_WITH_NEW=$(text_to_json_array "$D/rules_with_new_issues.txt") + jq --argjson rules "$RULES_WITH_NEW" \ + '[.[] | select(.code as $c | $rules | index($c))]' \ + "$D/delta_fixable_issues.json" > "$D/new_fixable_issues.json" +else + echo '[]' > "$D/new_fixable_issues.json" +fi + +# Count NEW fixable issues by category +jq -r '[.[] | .code | gsub("[0-9].*$"; "")] | group_by(.) | map("\(.[0]) \(length)")[]' "$D/new_fixable_issues.json" \ + > "$D/delta_category_fixable.txt" 2>/dev/null || touch "$D/delta_category_fixable.txt" + +# ============================================================================= +# Calculate Delta Fixable Total (auto-fixable NEW issues only) +# ============================================================================= +DELTA_FIXABLE=$(jq 'length' "$D/new_fixable_issues.json") + +# Calculate percentage of NEW issues that are auto-fixable +if [ "$NEW_ISSUES" -gt 0 ] && [ "$DELTA_FIXABLE" -gt 0 ]; then + DELTA_FIX_PCT=$((DELTA_FIXABLE * 100 / NEW_ISSUES)) + echo " → Auto-fixable NEW issues: $DELTA_FIXABLE of $NEW_ISSUES ($DELTA_FIX_PCT%)" +else + DELTA_FIX_PCT=0 + if [ "$NEW_ISSUES" -gt 0 ]; then + echo " ⚠️ None of the $NEW_ISSUES new issues are auto-fixable" + else + echo " ✓ No new issues to fix" + fi +fi + +# ============================================================================= +# Detect Error-Prone Rules (B=Bugbear, F=Pyflakes) +# ============================================================================= +# These rules catch real bugs, not just style issues +HAS_ERROR_PRONE="false" +ERROR_PRONE_RULES="" + +# Check if B or F categories have positive delta +for cat in B F; do + pr_count=$(grep "^$cat " "$D/pr_category_counts.txt" 2>/dev/null | awk '{print $2}' || true) + pr_count=${pr_count:-0} + main_count=$(grep "^$cat " "$D/main_category_counts.txt" 2>/dev/null | awk '{print $2}' || true) + main_count=${main_count:-0} + delta=$((pr_count - main_count)) + + if [ "$delta" -gt 0 ]; then + HAS_ERROR_PRONE="true" + # Map category code to full name + case "$cat" in + F) cat_display="Pyflakes (F)" ;; + B) cat_display="Bugbear (B)" ;; + *) cat_display="$cat" ;; + esac + if [ -z "$ERROR_PRONE_RULES" ]; then + ERROR_PRONE_RULES="$cat_display (+$delta)" + else + ERROR_PRONE_RULES="$ERROR_PRONE_RULES, $cat_display (+$delta)" + fi + fi +done + +if [ "$HAS_ERROR_PRONE" = "true" ]; then + echo "" + echo " 🐛 Error-prone rules detected: $ERROR_PRONE_RULES" + echo " These catch real bugs, not just style issues!" +fi + +# ============================================================================= +# Summary +# ============================================================================= +log_section "Summary" +echo " PR: $PR_TOTAL issues ($PR_FIXABLE fixable)" +echo " Main: $MAIN_TOTAL issues" +echo " Delta: $DELTA_STR" +echo "" +echo " New: $NEW_ISSUES (you introduced)" +echo " Fixed: $FIXED_ISSUES (you fixed)" +echo " Inherited: $INHERITED_ISSUES (pre-existing)" + +# ============================================================================= +# Export to stats.env (for sourcing by workflow) +# Quote values to handle spaces/special chars in DELTA_STR and ERROR_PRONE_RULES +# ============================================================================= +cat > "$D/stats.env" << EOF +PR_TOTAL="$PR_TOTAL" +PR_FIXABLE="$PR_FIXABLE" +MAIN_TOTAL="$MAIN_TOTAL" +DELTA="$DELTA" +DELTA_STR="$DELTA_STR" +DELTA_FIXABLE="$DELTA_FIXABLE" +DELTA_FIX_PCT="$DELTA_FIX_PCT" +FIX_PCT="$FIX_PCT" +NEW_ISSUES="$NEW_ISSUES" +FIXED_ISSUES="$FIXED_ISSUES" +INHERITED_ISSUES="$INHERITED_ISSUES" +HAS_ERROR_PRONE="$HAS_ERROR_PRONE" +ERROR_PRONE_RULES="$ERROR_PRONE_RULES" +EOF + +log_section "Stats exported to $D/stats.env" diff --git a/.github/scripts/ruff/lib.sh b/.github/scripts/ruff/lib.sh new file mode 100644 index 0000000..ebb496e --- /dev/null +++ b/.github/scripts/ruff/lib.sh @@ -0,0 +1,198 @@ +#!/bin/bash +# ============================================================================= +# lib.sh - Shared utilities for ruff scripts +# ============================================================================= +# Common functions for logging and validation. +# Default output is maximally useful - no flags needed. +# +# Usage: source "$(dirname "$0")/lib.sh" +# +# ============================================================================= + +# ============================================================================= +# Logging - always outputs useful info, no flags +# ============================================================================= + +# Section header +log_section() { + echo "=== $* ===" +} + +# Checkpoint: key metric with optional warning threshold +# Usage: log_checkpoint "description" [warn_if_zero] +log_checkpoint() { + local desc="$1" + local value="$2" + local warn_if_zero="${3:-false}" + + if [ "$warn_if_zero" = "true" ] && [ "$value" = "0" ]; then + echo " ⚠️ $desc: $value" + else + echo " → $desc: $value" + fi +} + +# Warning: always shown, prefixed for visibility +log_warn() { + echo "⚠️ $*" >&2 +} + +# Error: always shown to stderr +log_error() { + echo "❌ ERROR: $*" >&2 +} + +# Success indicator +log_ok() { + echo " ✓ $*" +} + +# ============================================================================= +# File inspection - concise output +# ============================================================================= + +# Log file line count, warn if empty when shouldn't be +# Usage: log_file [warn_if_empty] +log_file() { + local file="$1" + local desc="$2" + local warn_if_empty="${3:-false}" + + if [ ! -f "$file" ]; then + log_error "$desc: FILE NOT FOUND" + return 1 + fi + + local lines + lines=$(wc -l < "$file" | tr -d ' ') + + if [ "$lines" -eq 0 ] && [ "$warn_if_empty" = "true" ]; then + echo " ⚠️ $desc: empty" + else + echo " → $desc: $lines lines" + fi +} + +# Log JSON-lines file with issue count +# Usage: log_issues +log_issues() { + local file="$1" + local desc="$2" + + if [ ! -f "$file" ]; then + log_error "$desc: FILE NOT FOUND" + return 1 + fi + + local count + count=$(wc -l < "$file" | tr -d ' ') + echo " → $desc: $count issues" +} + +# ============================================================================= +# Validation helpers +# ============================================================================= + +# Validate JSON file, show error details if invalid +# Usage: validate_json +# Returns: 0 if valid, 1 if invalid +validate_json() { + local file="$1" + local desc="$2" + + # Ruff emits JSON-lines; when there are 0 issues, it may be an empty file. + if [ ! -s "$file" ]; then + log_ok "$desc: empty (0 issues)" + return 0 + elif jq empty "$file" >/dev/null 2>&1; then + log_ok "$desc: valid JSON" + return 0 + else + log_error "$desc: INVALID JSON - downstream processing will fail" + echo " First 3 lines:" >&2 + head -3 "$file" | sed 's/^/ /' >&2 + return 1 + fi +} + +# Check if file has expected minimum lines +# Usage: expect_lines +expect_lines() { + local file="$1" + local min="$2" + local desc="$3" + + local lines + lines=$(wc -l < "$file" 2>/dev/null | tr -d ' ') + lines=${lines:-0} + + if [ "$lines" -lt "$min" ]; then + log_warn "$desc: only $lines lines (expected at least $min)" + fi +} + +# ============================================================================= +# Bot commands reference (single source of truth) +# ============================================================================= +# Used by: build-comment.sh (PR comments) +# Referenced by: ruff-commands.yml (header docs) + +# Print bot commands table in markdown format +print_bot_commands_markdown() { + cat << 'EOF' +| Command | Description | +|---------|-------------| +| `check` | Run lint check only (no changes) | +| `check --fix` | Auto-fix lint issues, commit to PR | +| `format` | Auto-format code, commit to PR | +| `format --check` | Check formatting only (no changes) | +| `fix` | **Recommended:** check --fix + format | +EOF +} + +# ============================================================================= +# Path utilities +# ============================================================================= + +# Get workspace prefix for jq --arg (includes trailing slash) +# Usage: ws=$(get_workspace_prefix) +# Returns: "/path/to/workspace/" or "$(pwd)/" if GITHUB_WORKSPACE unset +get_workspace_prefix() { + echo "${GITHUB_WORKSPACE:-$(pwd)}/" +} + +# ============================================================================= +# jq helpers for rule extraction +# ============================================================================= + +# Count occurrences grouped by rule code +# Input: JSON-lines ruff output file +# Output: "RULE COUNT" per line (e.g., "F401 5") +jq_count_by_rule() { + local input="$1" + jq -rs '[.[] | .code] | group_by(.) | map("\(.[0]) \(length)")[]' "$input" +} + +# Count occurrences grouped by category (rule prefix before digits) +# Input: JSON-lines ruff output file +# Output: "CATEGORY COUNT" per line (e.g., "F 12") +jq_count_by_category() { + local input="$1" + jq -rs '[.[] | .code | gsub("[0-9].*$"; "")] | group_by(.) | map("\(.[0]) \(length)")[]' "$input" +} + +# Count fixable issues grouped by category +# Input: JSON-lines ruff output file +# Output: "CATEGORY COUNT" per line (fixable only) +jq_count_fixable_by_category() { + local input="$1" + jq -rs '[.[] | select(.fix) | .code | gsub("[0-9].*$"; "")] | group_by(.) | map("\(.[0]) \(length)")[]' "$input" +} + +# Convert newline-separated text file to JSON array +# Input: text file path +# Output: JSON array of non-empty lines +text_to_json_array() { + local file="$1" + jq -R -s 'split("\n") | map(select(length > 0))' "$file" +} diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml new file mode 100644 index 0000000..4c819db --- /dev/null +++ b/.github/workflows/mypy.yml @@ -0,0 +1,61 @@ +name: Mypy + +on: + pull_request: + branches: [main] + workflow_dispatch: + +concurrency: + group: mypy-pr-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + check: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: false + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r PySpotObserver/requirements.txt + + - name: Run mypy delta check + run: | + set -euo pipefail + mkdir -p .mypy-stats + + echo "Running mypy on PR branch..." + mypy . --show-error-codes --no-error-summary --no-pretty > .mypy-stats/pr.txt || true + + git checkout origin/main --quiet + + echo "Running mypy on main..." + mypy . --show-error-codes --no-error-summary --no-pretty > .mypy-stats/main.txt || true + + git checkout - --quiet + + # Normalize output enough for stable comparison. + # Keeps file, line, message, and error code. + sort .mypy-stats/pr.txt > .mypy-stats/pr.sorted.txt + sort .mypy-stats/main.txt > .mypy-stats/main.sorted.txt + + comm -23 .mypy-stats/pr.sorted.txt .mypy-stats/main.sorted.txt > .mypy-stats/new.txt + comm -13 .mypy-stats/pr.sorted.txt .mypy-stats/main.sorted.txt > .mypy-stats/fixed.txt + + NEW=$(wc -l < .mypy-stats/new.txt | tr -d ' ') + FIXED=$(wc -l < .mypy-stats/fixed.txt | tr -d ' ') + + echo "New mypy errors: $NEW" + echo "Fixed mypy errors: $FIXED" + + if [ "$NEW" -gt 0 ]; then + echo "" + echo "New mypy errors introduced by this PR:" + cat .mypy-stats/new.txt + exit 1 + fi \ No newline at end of file diff --git a/.github/workflows/ruff-commands.yml b/.github/workflows/ruff-commands.yml new file mode 100644 index 0000000..e517566 --- /dev/null +++ b/.github/workflows/ruff-commands.yml @@ -0,0 +1,407 @@ +# ============================================================================= +# Ruff Slash Commands Workflow +# ============================================================================= +# Triggered by PR comments with @ruff or /ruff commands. +# Matches actual ruff CLI syntax. +# +# Commands (canonical source: .github/scripts/ruff/lib.sh:print_bot_commands_markdown): +# check - Run lint check (adds 🚀 or ❌ reaction) +# check --fix - Auto-fix lint issues, commit to PR branch +# format - Auto-format code, commit to PR branch +# format --check - Check formatting only (no changes) +# fix - Shorthand for: check --fix && format +# +# Prefix with @ruff or /ruff in PR comments (e.g., /ruff check --fix) +# +# ============================================================================= + +name: Ruff Commands + +on: + issue_comment: + types: [created] + +jobs: + # =========================================================================== + # Parse command from PR comment + # =========================================================================== + parse-command: + if: > + github.event.issue.pull_request && ( + startsWith(github.event.comment.body, '/ruff') || + startsWith(github.event.comment.body, '@ruff') + ) && contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.comment.author_association) + runs-on: ubuntu-latest + outputs: + command: ${{ steps.parse.outputs.command }} + pr_number: ${{ steps.pr.outputs.number }} + pr_ref: ${{ steps.pr.outputs.ref }} + pr_head_ref: ${{ steps.pr.outputs.head_ref }} + steps: + - name: Parse command + id: parse + env: + COMMENT_BODY: ${{ github.event.comment.body }} + run: | + COMMENT="$COMMENT_BODY" + # Normalize: strip @ruff or /ruff prefix + CMD=$(echo "$COMMENT" | sed -E 's/^[@/]ruff[[:space:]]*//') + echo "Normalized command: '$CMD'" + + # Match CLI-style commands + if [[ "$CMD" == "fix"* ]]; then + # Shorthand for check --fix && format + echo "command=fix-all" >> "$GITHUB_OUTPUT" + elif [[ "$CMD" == "check --fix"* ]] || [[ "$CMD" == "check-fix"* ]]; then + echo "command=check-fix" >> "$GITHUB_OUTPUT" + elif [[ "$CMD" == "check"* ]]; then + echo "command=check" >> "$GITHUB_OUTPUT" + elif [[ "$CMD" == "format --check"* ]] || [[ "$CMD" == "format-check"* ]]; then + echo "command=format-check" >> "$GITHUB_OUTPUT" + elif [[ "$CMD" == "format"* ]]; then + echo "command=format" >> "$GITHUB_OUTPUT" + else + # Default: just @ruff or /ruff with no subcommand = check + echo "command=check" >> "$GITHUB_OUTPUT" + fi + echo "Parsed: $(grep command "$GITHUB_OUTPUT")" + + - name: Get PR details + id: pr + uses: actions/github-script@v7 + with: + script: | + const { data: pr } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number + }); + core.setOutput('ref', pr.head.sha); + core.setOutput('number', pr.number); + core.setOutput('head_ref', pr.head.ref); + console.log(`PR #${pr.number}: ${pr.head.ref} @ ${pr.head.sha}`); + + - name: Add eyes reaction + uses: actions/github-script@v7 + with: + script: | + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: 'eyes' + }); + + # =========================================================================== + # Check job: @ruff check or @ruff format --check + # =========================================================================== + check: + needs: [parse-command] + if: > + needs.parse-command.outputs.command == 'check' || + needs.parse-command.outputs.command == 'format-check' + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ needs.parse-command.outputs.pr_ref }} + + - name: Install ruff + run: pip install "ruff==0.15.2" + + - name: Run ruff check + id: ruff_check + if: needs.parse-command.outputs.command == 'check' + continue-on-error: true + run: ruff check . --output-format github + + - name: Run ruff format --check + id: ruff_format + if: needs.parse-command.outputs.command == 'format-check' + continue-on-error: true + run: ruff format --check . + + - name: Add result reaction + id: result_reaction + uses: actions/github-script@v7 + with: + script: | + const checkOutcome = '${{ steps.ruff_check.outcome }}'; + const formatOutcome = '${{ steps.ruff_format.outcome }}'; + const command = '${{ needs.parse-command.outputs.command }}'; + + // Each command only cares about its own concern + // - check: lint only + // - format-check: format only + let success; + if (command === 'format-check') { + success = formatOutcome !== 'failure'; + } else { + success = checkOutcome !== 'failure'; // Lint only + } + + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: success ? 'rocket' : 'confused' + }); + + core.setOutput('success', success); + core.setOutput('check_failed', checkOutcome === 'failure'); + core.setOutput('format_failed', formatOutcome === 'failure'); + + - name: Explain failure + if: steps.result_reaction.outputs.success == 'false' + uses: actions/github-script@v7 + with: + script: | + const command = '${{ needs.parse-command.outputs.command }}'; + const checkFailed = '${{ steps.result_reaction.outputs.check_failed }}' === 'true'; + const formatFailed = '${{ steps.result_reaction.outputs.format_failed }}' === 'true'; + const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${{ github.run_id }}`; + + // Only mention failures relevant to the command + let message = '😕 **Issues remain after running command.**\n\n'; + if (command === 'format-check' && formatFailed) { + message += '- ❌ Format check found issues\n'; + } else if (command === 'check' && checkFailed) { + message += '- ❌ Lint check found issues (may need manual fix)\n'; + } + message += `\n[View full logs →](${runUrl})`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: message + }); + + # =========================================================================== + # Check-fix job: @ruff check --fix + # =========================================================================== + check-fix: + needs: [parse-command] + if: > + needs.parse-command.outputs.command == 'check-fix' || + needs.parse-command.outputs.command == 'fix-all' + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ needs.parse-command.outputs.pr_head_ref }} + fetch-depth: 0 + + - name: Install ruff + run: pip install "ruff==0.15.2" + + - name: Run ruff check --fix + run: ruff check --fix . || true + + - name: Commit and push + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add -A + if git diff --staged --quiet; then + echo "No changes to commit" + else + git commit -m "ruff check --fix" + git push + fi + + # =========================================================================== + # Format job: @ruff format or part of @ruff fix + # =========================================================================== + format: + needs: [parse-command, check-fix] + # Run if format command, OR after check-fix for fix-all + # Use always() to run even if check-fix was skipped + if: > + always() && ( + needs.parse-command.outputs.command == 'format' || + needs.parse-command.outputs.command == 'fix-all' + ) + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ needs.parse-command.outputs.pr_head_ref }} + fetch-depth: 0 + + - name: Pull latest (in case check-fix job pushed) + run: git pull --rebase origin ${{ needs.parse-command.outputs.pr_head_ref }} || true + + - name: Install ruff + run: pip install "ruff==0.15.2" + + - name: Run ruff format + run: ruff format . + + - name: Commit and push + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add -A + if git diff --staged --quiet; then + echo "No changes to commit" + else + git commit -m "ruff format" + git push + fi + + # =========================================================================== + # Post-fix reaction: add emoji after fix/format completes + # =========================================================================== + post-fix-reaction: + needs: [parse-command, check-fix, format] + if: > + always() && ( + needs.parse-command.outputs.command == 'check-fix' || + needs.parse-command.outputs.command == 'format' || + needs.parse-command.outputs.command == 'fix-all' + ) + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ needs.parse-command.outputs.pr_head_ref }} + fetch-depth: 0 + + - name: Pull latest + run: git pull --rebase origin ${{ needs.parse-command.outputs.pr_head_ref }} || true + + - name: Install ruff + run: pip install "ruff==0.15.2" + + - name: Check final status + id: final_check + env: + COMMAND: ${{ needs.parse-command.outputs.command }} + run: | + set -o pipefail + echo "Command: $COMMAND" + + # Default to true (not applicable) + echo "check_ok=true" >> "$GITHUB_OUTPUT" + echo "format_ok=true" >> "$GITHUB_OUTPUT" + + # Only check what's relevant to the command + case "$COMMAND" in + check-fix) + echo "Verifying: lint status" + if ruff check . --output-format concise 2>&1 | head -20; then + echo " ✓ Lint: passed" + echo "check_ok=true" >> "$GITHUB_OUTPUT" + else + echo " ✗ Lint: issues remain" + echo "check_ok=false" >> "$GITHUB_OUTPUT" + fi + ;; + format) + echo "Verifying: format status" + if ruff format --check . 2>&1 | head -20; then + echo " ✓ Format: passed" + echo "format_ok=true" >> "$GITHUB_OUTPUT" + else + echo " ✗ Format: issues remain" + echo "format_ok=false" >> "$GITHUB_OUTPUT" + fi + ;; + fix-all) + echo "Verifying: lint and format status" + if ruff check . --output-format concise 2>&1 | head -20; then + echo " ✓ Lint: passed" + echo "check_ok=true" >> "$GITHUB_OUTPUT" + else + echo " ✗ Lint: issues remain" + echo "check_ok=false" >> "$GITHUB_OUTPUT" + fi + if ruff format --check . 2>&1 | head -20; then + echo " ✓ Format: passed" + echo "format_ok=true" >> "$GITHUB_OUTPUT" + else + echo " ✗ Format: issues remain" + echo "format_ok=false" >> "$GITHUB_OUTPUT" + fi + ;; + esac + + - name: Add result reaction + id: result_reaction + uses: actions/github-script@v7 + with: + script: | + const command = '${{ needs.parse-command.outputs.command }}'; + const checkOk = '${{ steps.final_check.outputs.check_ok }}' === 'true'; + const formatOk = '${{ steps.final_check.outputs.format_ok }}' === 'true'; + + // Success based on what the command cares about + let success; + if (command === 'format') { + success = formatOk; + } else if (command === 'check-fix') { + success = checkOk; + } else { + // fix-all: both must pass + success = checkOk && formatOk; + } + + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: success ? 'rocket' : 'confused' + }); + + core.setOutput('success', success); + core.setOutput('check_failed', !checkOk); + core.setOutput('format_failed', !formatOk); + + - name: Explain failure + if: steps.result_reaction.outputs.success == 'false' + uses: actions/github-script@v7 + with: + script: | + const command = '${{ needs.parse-command.outputs.command }}'; + const checkFailed = '${{ steps.result_reaction.outputs.check_failed }}' === 'true'; + const formatFailed = '${{ steps.result_reaction.outputs.format_failed }}' === 'true'; + const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${{ github.run_id }}`; + + // Only mention failures relevant to the command + let message = '😕 **Issues remain after running command.**\n\n'; + if (command === 'format') { + message += '- ❌ Format check found issues\n'; + } else if (command === 'check-fix') { + message += '- ❌ Lint check found issues (may need manual fix)\n'; + } else { + // fix-all: mention both if both failed + if (checkFailed && formatFailed) { + message += '- ❌ Lint check found issues (may need manual fix)\n- ❌ Format check found issues\n'; + } else if (checkFailed) { + message += '- ❌ Lint check found issues (may need manual fix)\n'; + } else if (formatFailed) { + message += '- ❌ Format check found issues\n'; + } + } + message += `\n[View full logs →](${runUrl})`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: message + }); diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 0000000..89e5025 --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,370 @@ +# ============================================================================= +# Ruff Linting & Formatting Workflow +# ============================================================================= +# Runs on every PR push. Posts/updates summary comment with ruff statistics. +# +# Installs Ruff via pip and runs custom scripts to compute a delta vs origin/main. +# Docs: https://docs.astral.sh/ruff/ +# For slash commands (/ruff fix, /ruff format), see: ruff-commands.yml +# +# ============================================================================= +# BEHAVIOR +# ============================================================================= +# +# Posts/updates summary comment with delta-based ruff statistics: +# - Fails ONLY if NEW issues introduced (delta-based, not absolute) +# - Rewards fixing inherited issues with celebration emoji +# - Contributors not blocked for pre-existing problems in modified code +# +# Comment includes: +# - Summary card: new/fixed/inherited issue breakdown +# - Category/rule delta tables with emoji prefixes +# - Top offender: rule with biggest regression +# - Context-aware tips and auto-fix recommendations +# +# ============================================================================= +# KEY DESIGN DECISIONS +# ============================================================================= +# +# 1. DELTA-BASED PHILOSOPHY (Critical) +# - Pass/fail based on NEW_ISSUES (sum of positive rule deltas) +# - NOT based on DELTA (net change in total issues) +# - Rationale: Contributors should only be blocked for issues THEY introduce +# - Example: Fix 392, introduce 4 → DELTA=-388 (pass?) NO! NEW_ISSUES=4 (fail) +# +# 2. RULE-LEVEL DELTA CALCULATION (Critical) +# - Must calculate deltas from individual rules, NOT categories +# - Prevents masked regressions: RUF022 +4 hidden by RUF overall -6 +# - Category-level calculation misses regressions within improving categories +# +# 3. FILES_FILTER (Auto-fix Scoping) +# - Filters fixable issues to PR-changed files only +# - Used for auto-fix recommendations (don't suggest fixing unrelated files) +# - Built from: git diff --name-only origin/main...HEAD +# +# 4. DESCRIPTION SANITIZATION +# - Removes |, newlines, backticks from rule descriptions +# - Prevents markdown table breakage in PR comments +# +# 5. ANNOTATIONS (NEW Issues Only) +# - Generated by dedicated annotate-new-issues.sh script +# - Runs AFTER collect-stats.sh to filter using rules_with_new_issues.txt +# - Only annotates rules with positive deltas (new issues) +# - Filters out inherited issues to highlight actual regressions +# - GitHub limit: 10 annotations/step, 50/workflow +# +# 6. EMOJI CODING SYSTEM +# - 🐛 Pyflakes/Bugbear: catches bugs +# - 🎨 pycodestyle: style errors/warnings +# - ⚡ pyupgrade/simplify: modern syntax +# - 📦 isort/Ruff: import/package organization +# - 📊 NumPy/Pandas: array/dataframe patterns +# - Emoji flush with tag: 🐛[F401] not [🐛 F401] +# +# ============================================================================= +# FUTURE ENHANCEMENTS (Not Implemented) +# ============================================================================= +# +# Category Delta Table - Additional columns proposed but not implemented: +# 1. Top Rule: Most common rule in category, e.g., "RUF022 (4)" +# 2. Files: Count of affected files in category, e.g., "15" +# +# Tradeoffs: +# - Pros: More context at a glance, helps identify issue concentration +# - Cons: Wider table (7 cols vs 5), more jq processing, rules table already shows details +# +# Current decision: Tables are fully functional, these are nice-to-have only +# +# ============================================================================= +# LOCAL USAGE +# ============================================================================= +# +# ruff check . # Check for linting issues +# ruff check --fix . # Auto-fix linting issues +# ruff format --check . # Check formatting +# ruff format . # Auto-format files +# +# Or all at once: +# ruff check --fix . && ruff format . +# +# TRIGGER + LIMITER POLICY: +# - Trigger: pull_request (includes synchronize on new commits) +# - Limiter: PR-scoped concurrency + cancel-in-progress +# - Intent: latest-commit-wins on rapid push bursts +# +# FIXME(external): Recent zero-step Ruff failures were caused by GitHub account +# billing or spending-limit blocks before the runner executed any step. When +# the job fails with no steps, check billing before debugging Ruff or this YAML. +# +# ============================================================================= + +name: Ruff + +on: + pull_request: + branches: [main] + +concurrency: + group: ruff-pr-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + # =========================================================================== + # Check job: runs on every PR push + # =========================================================================== + check: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: false + + - name: Install ruff + run: pip install "ruff==0.15.2" + + # --- Format Check (PR-changed files only, delta-based) --- + - name: Run ruff format + id: ruff_format + continue-on-error: true + run: | + # shellcheck disable=SC2086,SC2129 + mkdir -p .ruff-stats + + # Get Python files changed in this PR + PR_FILES=$(git diff --name-only origin/main...HEAD | grep '\.py$' || true) + FILE_COUNT=$(echo "$PR_FILES" | grep -c . || echo "0") + + if [ -z "$PR_FILES" ]; then + echo "No Python files changed in PR" + echo "format_files_checked=0" >> "$GITHUB_OUTPUT" + echo "format_new=0" >> "$GITHUB_OUTPUT" + echo "format_fixed=0" >> "$GITHUB_OUTPUT" + echo "format_inherited=0" >> "$GITHUB_OUTPUT" + : > .ruff-stats/format_files.txt + exit 0 + fi + + echo "format_files_checked=$FILE_COUNT" >> "$GITHUB_OUTPUT" + + # Check formatting on PR branch + # shellcheck disable=SC2086 + ruff format --check $PR_FILES 2>&1 | grep "^Would reformat:" | \ + sed 's/^Would reformat: //' | sort > .ruff-stats/format_pr.txt || true + + # Check formatting on main branch (same files) + git stash --include-untracked --quiet || true + git checkout origin/main --quiet + + MAIN_PR_FILES="" + for file in $PR_FILES; do + [ -f "$file" ] && MAIN_PR_FILES="$MAIN_PR_FILES $file" + done + + if [ -n "$MAIN_PR_FILES" ]; then + # shellcheck disable=SC2086 + ruff format --check $MAIN_PR_FILES 2>&1 | grep "^Would reformat:" | \ + sed 's/^Would reformat: //' | sort > .ruff-stats/format_main.txt || true + else + # No files to check on main (e.g., all new files), so create empty format_main.txt + : > .ruff-stats/format_main.txt + fi + + git checkout - --quiet + git stash pop --quiet 2>/dev/null || true + + # Calculate deltas + # New: in PR but not in main (PR introduced bad formatting) + comm -23 .ruff-stats/format_pr.txt .ruff-stats/format_main.txt > .ruff-stats/format_new.txt + # Fixed: in main but not in PR (PR fixed formatting) + comm -13 .ruff-stats/format_pr.txt .ruff-stats/format_main.txt > .ruff-stats/format_fixed.txt + # Inherited: in both (pre-existing, not PR author's fault) + comm -12 .ruff-stats/format_pr.txt .ruff-stats/format_main.txt > .ruff-stats/format_inherited.txt + + FORMAT_NEW=$(wc -l < .ruff-stats/format_new.txt | tr -d ' ') + FORMAT_FIXED=$(wc -l < .ruff-stats/format_fixed.txt | tr -d ' ') + FORMAT_INHERITED=$(wc -l < .ruff-stats/format_inherited.txt | tr -d ' ') + + echo "format_new=$FORMAT_NEW" >> "$GITHUB_OUTPUT" + echo "format_fixed=$FORMAT_FIXED" >> "$GITHUB_OUTPUT" + echo "format_inherited=$FORMAT_INHERITED" >> "$GITHUB_OUTPUT" + + # Copy new files list for display (only show files that are PR author's responsibility) + cp .ruff-stats/format_new.txt .ruff-stats/format_files.txt + + # --- Collect Statistics --- + - name: Collect ruff statistics + id: stats + run: | + # shellcheck disable=SC2086,SC2129 + # Unset RUFF_OUTPUT_FORMAT to ensure --output-format flag works + # (ruff-action sets this to "github" which persists) + unset RUFF_OUTPUT_FORMAT + + # Run collection script + .github/scripts/ruff/collect-stats.sh + + # Export stats to GitHub outputs + source .ruff-stats/stats.env + echo "pr_total=$PR_TOTAL" >> "$GITHUB_OUTPUT" + echo "pr_fixable=$PR_FIXABLE" >> "$GITHUB_OUTPUT" + echo "main_total=$MAIN_TOTAL" >> "$GITHUB_OUTPUT" + echo "delta=$DELTA" >> "$GITHUB_OUTPUT" + echo "delta_str=$DELTA_STR" >> "$GITHUB_OUTPUT" + echo "delta_fixable=$DELTA_FIXABLE" >> "$GITHUB_OUTPUT" + echo "delta_fix_pct=$DELTA_FIX_PCT" >> "$GITHUB_OUTPUT" + echo "new_issues=$NEW_ISSUES" >> "$GITHUB_OUTPUT" + echo "fixed_issues=$FIXED_ISSUES" >> "$GITHUB_OUTPUT" + echo "inherited_issues=$INHERITED_ISSUES" >> "$GITHUB_OUTPUT" + echo "has_error_prone=$HAS_ERROR_PRONE" >> "$GITHUB_OUTPUT" + echo "error_prone_rules=$ERROR_PRONE_RULES" >> "$GITHUB_OUTPUT" + + # --- Annotate NEW Issues Only --- + # Generates GitHub annotations filtered to rules with positive deltas + # Skip entirely when no new issues (avoids unnecessary script execution) + - name: Annotate new issues + if: steps.stats.outputs.new_issues > 0 + env: + GITHUB_WORKSPACE: ${{ github.workspace }} + run: .github/scripts/ruff/annotate-new-issues.sh + + # --- Build Tables --- + - name: Build delta tables + env: + GITHUB_REPO: ${{ github.repository }} + GITHUB_SHA: ${{ github.sha }} + GITHUB_WORKSPACE: ${{ github.workspace }} + run: .github/scripts/ruff/build-tables.sh + + # --- Build Comment Body --- + - name: Build comment body + env: + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: | + # Pass format stats to build script + export FORMAT_FILES_CHECKED="${{ steps.ruff_format.outputs.format_files_checked }}" + export FORMAT_NEW="${{ steps.ruff_format.outputs.format_new }}" + export FORMAT_FIXED="${{ steps.ruff_format.outputs.format_fixed }}" + export FORMAT_INHERITED="${{ steps.ruff_format.outputs.format_inherited }}" + .github/scripts/ruff/build-comment.sh + + - name: Find existing comment + uses: peter-evans/find-comment@v3 + id: find-comment + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: '' + + - name: Post PR comment + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ steps.find-comment.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body-path: .ruff-stats/comment_body.md + edit-mode: replace + + # --- Final Status --- + # This step's output is what users see when clicking on a failed check + # So we include full context here, not just in earlier steps + - name: Check for regressions + run: | + echo "=== Ruff Check Summary ===" + echo "" + + # Load all stats for comprehensive output + NEW_ISSUES="${{ steps.stats.outputs.new_issues }}" + FIXED_ISSUES="${{ steps.stats.outputs.fixed_issues }}" + PR_TOTAL="${{ steps.stats.outputs.pr_total }}" + MAIN_TOTAL="${{ steps.stats.outputs.main_total }}" + DELTA="${{ steps.stats.outputs.delta }}" + INHERITED="${{ steps.stats.outputs.inherited_issues }}" + ERROR_PRONE="${{ steps.stats.outputs.error_prone_rules }}" + FORMAT_NEW="${{ steps.ruff_format.outputs.format_new }}" + + # Show comparison + echo " Main branch: $MAIN_TOTAL issues" + echo " PR branch: $PR_TOTAL issues (delta: $DELTA)" + echo "" + echo " 🔴 New: $NEW_ISSUES (you introduced — must fix)" + echo " 🟢 Fixed: $FIXED_ISSUES (you fixed — thank you!)" + echo " ⚪ Inherited: $INHERITED (pre-existing in codebase — not your fault)" + echo "" + + # Show which rules have new issues + if [ "$NEW_ISSUES" -gt 0 ] && [ -f .ruff-stats/rules_with_new_issues.txt ]; then + echo " Rules with new issues:" + cat .ruff-stats/rules_with_new_issues.txt | while read -r rule; do + echo " - $rule" + done + echo "" + fi + + # Show error-prone rules if any + if [ -n "$ERROR_PRONE" ]; then + echo " 🐛 Error-prone rules: $ERROR_PRONE" + echo " (These catch real bugs, not just style issues!)" + echo "" + fi + + # Final verdict + echo "=== Result ===" + if [ "$NEW_ISSUES" -gt 0 ]; then + echo "" + echo " ❌ FAILED: Introduced $NEW_ISSUES new ruff issues" + echo "" + + # Show full ruff output for regressions only + echo "=== Ruff Output (new issues only) ===" + echo "" + if [ -f .ruff-stats/rules_with_new_issues.txt ]; then + PATTERN=$(paste -sd'|' .ruff-stats/rules_with_new_issues.txt) + # Full ruff-style output: file:line:col: CODE message + # Include URL for documentation reference + jq -r --arg pattern "$PATTERN" ' + select(.code | test($pattern)) | + "\(.filename):L\(.location.row):\(.location.column): \(.code) \(.message)" + + (if .url then "\n → \(.url)" else "" end) + ' .ruff-stats/pr_ruff_output.json 2>/dev/null || echo "(could not parse ruff output)" + fi + echo "" + + echo "=== To Fix ===" + echo "" + echo " 1. Run: ruff check --fix ." + echo " 2. Review and commit the changes" + echo " 3. Push to update this PR" + echo "" + echo "::error::Introduced $NEW_ISSUES new ruff issues (fixed $FIXED_ISSUES)" + exit 1 + fi + + if [ "${FORMAT_NEW:-0}" -gt 0 ]; then + echo "" + echo " ❌ FAILED: Introduced $FORMAT_NEW new formatting issue(s)" + echo "" + + # Show which files need formatting + if [ -s .ruff-stats/format_new.txt ]; then + echo " Files needing formatting:" + cat .ruff-stats/format_new.txt | sed 's/^/ /' + echo "" + fi + + echo " To fix:" + echo " 1. Run: ruff format ." + echo " 2. Commit the changes" + echo " 3. Push to update this PR" + echo "::error::Ruff format check failed" + exit 1 + fi + + echo "" + echo " ✅ PASSED: No new issues introduced" + if [ "$FIXED_ISSUES" -gt 0 ]; then + echo " 🎉 Bonus: Fixed $FIXED_ISSUES pre-existing issues!" + fi diff --git a/PySpotObserver/README.md b/PySpotObserver/README.md index 25875db..5ce4d9e 100644 --- a/PySpotObserver/README.md +++ b/PySpotObserver/README.md @@ -191,7 +191,7 @@ The Python implementation differs in these ways: When contributing, please follow these guidelines: -1. Use `black` for code formatting +1. Use `ruff` for code formatting 2. Use `mypy` for type checking 3. Add tests for new features 4. Update documentation diff --git a/PySpotObserver/examples/basic_streaming.py b/PySpotObserver/examples/basic_streaming.py index f15274c..6a844de 100644 --- a/PySpotObserver/examples/basic_streaming.py +++ b/PySpotObserver/examples/basic_streaming.py @@ -11,18 +11,15 @@ import argparse import asyncio +import logging +import time +from collections.abc import Sequence from contextlib import AsyncExitStack, ExitStack from dataclasses import dataclass -import logging from pathlib import Path -import time -from typing import Sequence - import cv2 import numpy as np - -from pyspotobserver import CameraType, SpotConfig, SpotConnection from common_cli import ( add_common_connection_arguments, build_camera_mask, @@ -30,6 +27,7 @@ parse_camera_list, ) +from pyspotobserver import CameraType, SpotConfig, SpotConnection logging.basicConfig( level=logging.INFO, @@ -126,9 +124,7 @@ def parse_args() -> argparse.Namespace: help="Print per-stream timing summary at the end of the run.", ) parser.add_argument( - "--vision-pipeline", - action="store_true", - help="Run the vision pipeline on the Spot outputs" + "--vision-pipeline", action="store_true", help="Run the vision pipeline on the Spot outputs" ) parser.add_argument( "--vision-model-path", @@ -173,7 +169,9 @@ def build_stream_specs(args: argparse.Namespace) -> list[StreamSpec]: return specs -def display_images(window_prefix: str, stream, rgb_images: Sequence[np.ndarray], depth_images: Sequence[np.ndarray]) -> bool: +def display_images( + window_prefix: str, stream, rgb_images: Sequence[np.ndarray], depth_images: Sequence[np.ndarray] +) -> bool: for i, (rgb, depth) in enumerate(zip(rgb_images, depth_images)): camera_name = stream.get_camera_order()[i].name rgb_display = (rgb * 255).astype(np.uint8) @@ -216,7 +214,9 @@ def print_timing_summary( print("Per-robot aggregate throughput:") for robot_label, stats in timing_by_robot.items(): avg_fetch_ms = (stats.fetch_seconds / stats.frames) * 1000.0 if stats.frames else 0.0 - avg_display_ms = (stats.display_seconds / stats.frames) * 1000.0 if stats.frames else 0.0 + avg_display_ms = ( + (stats.display_seconds / stats.frames) * 1000.0 if stats.frames else 0.0 + ) avg_loop_ms = (stats.loop_seconds / stats.frames) * 1000.0 if stats.frames else 0.0 fps = stats.frames / elapsed_seconds if elapsed_seconds > 0 else 0.0 print( @@ -236,7 +236,9 @@ def build_connection_configs(args: argparse.Namespace) -> dict[str, SpotConfig]: return configs -def start_streams(connections: dict[str, SpotConnection], specs: list[StreamSpec]) -> dict[str, object]: +def start_streams( + connections: dict[str, SpotConnection], specs: list[StreamSpec] +) -> dict[str, object]: streams = {} for spec in specs: conn = connections[spec.robot_label] @@ -286,7 +288,9 @@ def run_sync(args: argparse.Namespace, specs: list[StreamSpec]) -> int: display_elapsed = 0.0 if not args.no_display: display_start = time.perf_counter() - should_quit = display_images(spec.display_label, stream, rgb_images, depth_images) + should_quit = display_images( + spec.display_label, stream, rgb_images, depth_images + ) display_elapsed = time.perf_counter() - display_start timing_by_label[spec.label].add( @@ -299,14 +303,18 @@ def run_sync(args: argparse.Namespace, specs: list[StreamSpec]) -> int: logger.info("User requested quit") break finally: - finalize_sync(connections, streams, specs, timing_by_label, args.print_timing, overall_start) + finalize_sync( + connections, streams, specs, timing_by_label, args.print_timing, overall_start + ) return 0 async def fetch_stream_async(stream, timeout: float, vision_pipeline: bool) -> FetchResult: fetch_start = time.perf_counter() - rgb_images, depth_images = await stream.async_get_current_images(timeout=timeout, run_pipeline=vision_pipeline) - + rgb_images, depth_images = await stream.async_get_current_images( + timeout=timeout, run_pipeline=vision_pipeline + ) + return FetchResult( rgb_images=rgb_images, depth_images=depth_images, @@ -332,7 +340,10 @@ async def run_async(args: argparse.Namespace, specs: list[StreamSpec]) -> int: start_time = time.perf_counter() while time.perf_counter() - start_time < args.duration: results = await asyncio.gather( - *(fetch_stream_async(streams[spec.label], args.timeout, args.vision_pipeline) for spec in specs) + *( + fetch_stream_async(streams[spec.label], args.timeout, args.vision_pipeline) + for spec in specs + ) ) should_quit = False @@ -340,12 +351,15 @@ async def run_async(args: argparse.Namespace, specs: list[StreamSpec]) -> int: display_elapsed = 0.0 if not args.no_display: display_start = time.perf_counter() - should_quit = display_images( - spec.display_label, - streams[spec.label], - result.rgb_images, - result.depth_images, - ) or should_quit + should_quit = ( + display_images( + spec.display_label, + streams[spec.label], + result.rgb_images, + result.depth_images, + ) + or should_quit + ) display_elapsed = time.perf_counter() - display_start timing_by_label[spec.label].add( @@ -360,7 +374,9 @@ async def run_async(args: argparse.Namespace, specs: list[StreamSpec]) -> int: await asyncio.sleep(0.01) finally: - finalize_async(connections, streams, specs, timing_by_label, args.print_timing, overall_start) + finalize_async( + connections, streams, specs, timing_by_label, args.print_timing, overall_start + ) return 0 @@ -385,7 +401,9 @@ def finalize_sync( logger.info("%s robot active streams: %s", label, conn.list_streams()) logger.info("Disconnected from robot(s)") if print_timing: - print_timing_summary(timing_by_label, specs, connections, time.perf_counter() - overall_start) + print_timing_summary( + timing_by_label, specs, connections, time.perf_counter() - overall_start + ) return 0 @@ -410,7 +428,9 @@ def finalize_async( logger.info("%s robot active streams: %s", label, conn.list_streams()) logger.info("Disconnected from robot(s)") if print_timing: - print_timing_summary(timing_by_label, specs, connections, time.perf_counter() - overall_start) + print_timing_summary( + timing_by_label, specs, connections, time.perf_counter() - overall_start + ) return 0 diff --git a/PySpotObserver/examples/benchmark_allocation.py b/PySpotObserver/examples/benchmark_allocation.py index 8583da3..1e4d1ab 100644 --- a/PySpotObserver/examples/benchmark_allocation.py +++ b/PySpotObserver/examples/benchmark_allocation.py @@ -73,7 +73,9 @@ def _resolve_camera_pairs( for cam in cameras: rgb = CameraType.get_source_name(cam, depth=False) depth = CameraType.get_source_name(cam, depth=True) - if available_sources is not None and (rgb not in available_sources or depth not in available_sources): + if available_sources is not None and ( + rgb not in available_sources or depth not in available_sources + ): skipped.append((cam, rgb, depth)) continue pairs.append((cam, rgb, depth)) @@ -135,7 +137,9 @@ def main() -> int: source_protos = conn.image_client.list_image_sources() available_sources = {src.name for src in source_protos} except Exception as exc: - print(f"Warning: failed to list image sources ({exc}). Proceeding without pre-validation.") + print( + f"Warning: failed to list image sources ({exc}). Proceeding without pre-validation." + ) pairs = _resolve_camera_pairs(requested_cameras, available_sources) requests = _build_requests(pairs) @@ -161,8 +165,13 @@ def main() -> int: decoded_first_pass.append(_convert_alloc(responses[i * 2], is_depth=False)) decoded_first_pass.append(_convert_alloc(responses[i * 2 + 1], is_depth=True)) - rgb_buffers = [np.zeros(decoded_first_pass[i * 2].shape, dtype=np.float32) for i in range(len(pairs))] - depth_buffers = [np.zeros(decoded_first_pass[i * 2 + 1].shape, dtype=np.float32) for i in range(len(pairs))] + rgb_buffers = [ + np.zeros(decoded_first_pass[i * 2].shape, dtype=np.float32) for i in range(len(pairs)) + ] + depth_buffers = [ + np.zeros(decoded_first_pass[i * 2 + 1].shape, dtype=np.float32) + for i in range(len(pairs)) + ] def inplace_once() -> None: for i in range(len(pairs)): @@ -190,7 +199,9 @@ def alloc_once() -> None: per_iter_inplace_ms = (t_inplace / args.iters) * 1000.0 per_iter_alloc_ms = (t_alloc / args.iters) * 1000.0 - speedup = (per_iter_alloc_ms / per_iter_inplace_ms) if per_iter_inplace_ms > 0 else float("inf") + speedup = ( + (per_iter_alloc_ms / per_iter_inplace_ms) if per_iter_inplace_ms > 0 else float("inf") + ) print("Benchmark results (single captured response set):") print("Cameras used:", ", ".join(cam.name for cam, _, _ in pairs)) diff --git a/PySpotObserver/examples/common_cli.py b/PySpotObserver/examples/common_cli.py index 509e9c5..65803b1 100644 --- a/PySpotObserver/examples/common_cli.py +++ b/PySpotObserver/examples/common_cli.py @@ -64,10 +64,7 @@ def add_stream_arguments( parser.add_argument( "--cameras", default=default_cameras, - help=( - "Comma-separated camera list. " - f"Choices: {', '.join(sorted(CAMERA_NAME_MAP))}." - ), + help=(f"Comma-separated camera list. Choices: {', '.join(sorted(CAMERA_NAME_MAP))}."), ) parser.add_argument( "--duration", diff --git a/PySpotObserver/pyspotobserver/camera_stream.py b/PySpotObserver/pyspotobserver/camera_stream.py index bcf4531..ae464de 100644 --- a/PySpotObserver/pyspotobserver/camera_stream.py +++ b/PySpotObserver/pyspotobserver/camera_stream.py @@ -34,6 +34,7 @@ class ImageFrame: timestamp: Time when frame was captured (monotonic clock) acquisition_time: Robot's acquisition time from image response """ + rgb_images: List[np.ndarray] depth_images: List[np.ndarray] camera_order: List[CameraType] @@ -43,6 +44,7 @@ class ImageFrame: class SpotCamStreamError(Exception): """Base exception for SpotCamStream errors.""" + pass @@ -104,9 +106,13 @@ def __init__( # Color correction matrices (None if robot IP is not recognized) self._ccms: Optional[dict] = _ROBOT_CCMS.get(config.robot_ip) if self._ccms is not None: - logger.info(f"SpotCamStream '{stream_id}': Color correction enabled for {config.robot_ip}") + logger.info( + f"SpotCamStream '{stream_id}': Color correction enabled for {config.robot_ip}" + ) else: - logger.info(f"SpotCamStream '{stream_id}': No color correction (unrecognized IP {config.robot_ip})") + logger.info( + f"SpotCamStream '{stream_id}': No color correction (unrecognized IP {config.robot_ip})" + ) # Statistics self._frame_count = 0 @@ -155,9 +161,7 @@ def start_streaming(self, camera_mask: int) -> None: if camera_mask == 0: raise SpotCamStreamError("Camera mask cannot be zero") if camera_mask & ~self._valid_camera_mask(): - raise SpotCamStreamError( - f"Camera mask contains unknown bits: {camera_mask:#x}" - ) + raise SpotCamStreamError(f"Camera mask contains unknown bits: {camera_mask:#x}") # Parse camera mask to get ordered list of cameras self._camera_mask = camera_mask @@ -275,9 +279,7 @@ def get_current_images( if deadline is not None: remaining = deadline - time.monotonic() if remaining <= 0: - raise SpotCamStreamError( - f"Timeout waiting for images (timeout={timeout}s)" - ) + raise SpotCamStreamError(f"Timeout waiting for images (timeout={timeout}s)") wait_timeout = min(wait_timeout, remaining) try: @@ -322,9 +324,7 @@ async def async_get_current_images( loop = asyncio.get_event_loop() if not run_pipeline: - return await loop.run_in_executor( - None, self.get_current_images, timeout, copy, False - ) + return await loop.run_in_executor(None, self.get_current_images, timeout, copy, False) # Run full pipeline in executor def _get_and_process(): @@ -451,7 +451,7 @@ def _build_image_requests(self) -> List[image_pb2.ImageRequest]: rgb_source, quality_percent=self._config.image_quality_percent, image_format=image_pb2.Image.FORMAT_JPEG, - pixel_format=image_pb2.Image.PIXEL_FORMAT_RGB_U8 + pixel_format=image_pb2.Image.PIXEL_FORMAT_RGB_U8, ) ) @@ -507,12 +507,8 @@ def _initialize_frame_pool( self._frame_pool_index = 0 for _ in range(pool_size): - rgb_images = [ - np.zeros(shape, dtype=np.float32) for shape in rgb_shapes - ] - depth_images = [ - np.zeros(shape, dtype=np.float32) for shape in depth_shapes - ] + rgb_images = [np.zeros(shape, dtype=np.float32) for shape in rgb_shapes] + depth_images = [np.zeros(shape, dtype=np.float32) for shape in depth_shapes] self._frame_pool.append( ImageFrame( rgb_images=rgb_images, @@ -547,9 +543,7 @@ def _enqueue_frame(self, frame: ImageFrame) -> None: try: self._image_queue.put(frame, timeout=0.001) except Exception as e: - logger.warning( - f"Stream '{self._stream_id}': Failed to enqueue frame: {e}" - ) + logger.warning(f"Stream '{self._stream_id}': Failed to enqueue frame: {e}") def _peek_latest_frame(self) -> Optional[ImageFrame]: """ @@ -572,9 +566,7 @@ def _fill_frame_from_responses( expected_count = n_cameras * 2 # RGB + depth per camera if len(responses) != expected_count: - raise SpotCamStreamError( - f"Expected {expected_count} responses, got {len(responses)}" - ) + raise SpotCamStreamError(f"Expected {expected_count} responses, got {len(responses)}") for i in range(n_cameras): rgb_idx = i * 2 @@ -584,12 +576,9 @@ def _fill_frame_from_responses( if self._ccms is not None: ccm = self._ccms[self._camera_order[i]] self._convert_image_response_inplace( - responses[rgb_idx], - is_depth=False, - out_array=frame.rgb_images[i], - ccm=ccm + responses[rgb_idx], is_depth=False, out_array=frame.rgb_images[i], ccm=ccm ) - + self._convert_image_response_inplace( responses[depth_idx], is_depth=True, @@ -642,9 +631,7 @@ def _decode_initial_responses( expected_count = n_cameras * 2 # RGB + depth per camera if len(responses) != expected_count: - raise SpotCamStreamError( - f"Expected {expected_count} responses, got {len(responses)}" - ) + raise SpotCamStreamError(f"Expected {expected_count} responses, got {len(responses)}") decoded: List[np.ndarray] = [] for i in range(n_cameras): @@ -682,12 +669,12 @@ def _convert_image_response_alloc( if is_depth: if pixel_format != image_pb2.Image.PIXEL_FORMAT_DEPTH_U16: - raise SpotCamStreamError( - f"Unexpected depth pixel format: {pixel_format}" - ) + raise SpotCamStreamError(f"Unexpected depth pixel format: {pixel_format}") img_data = np.frombuffer(image_proto.data, dtype=np.uint16) img = img_data.reshape((rows, cols)) - depth_scale = response.source.depth_scale if response.source.depth_scale > 0 else 1.0 + depth_scale = ( + response.source.depth_scale if response.source.depth_scale > 0 else 1.0 + ) return img.astype(np.float32) / depth_scale if pixel_format == image_pb2.Image.PIXEL_FORMAT_RGB_U8: @@ -706,20 +693,16 @@ def _convert_image_response_alloc( img = np.stack([img] * 3, axis=-1) return img.astype(np.float32) / 255.0 - raise SpotCamStreamError( - f"Unsupported pixel format: {pixel_format}" - ) + raise SpotCamStreamError(f"Unsupported pixel format: {pixel_format}") - raise SpotCamStreamError( - f"Unsupported image format: {image_proto.format}" - ) + raise SpotCamStreamError(f"Unsupported image format: {image_proto.format}") def _convert_image_response_inplace( self, response: image_pb2.ImageResponse, is_depth: bool, out_array: np.ndarray, - ccm: Optional[np.ndarray] = None + ccm: Optional[np.ndarray] = None, ) -> None: """ Convert ImageResponse protobuf to numpy array in-place. @@ -768,9 +751,7 @@ def _convert_image_response_inplace( if is_depth: # Depth image (16-bit unsigned) if pixel_format != image_pb2.Image.PIXEL_FORMAT_DEPTH_U16: - raise SpotCamStreamError( - f"Unexpected depth pixel format: {pixel_format}" - ) + raise SpotCamStreamError(f"Unexpected depth pixel format: {pixel_format}") # Parse as uint16 img_data = np.frombuffer(image_proto.data, dtype=np.uint16) @@ -782,7 +763,9 @@ def _convert_image_response_inplace( ) # Convert to meters using depth scale - depth_scale = response.source.depth_scale if response.source.depth_scale > 0 else 1.0 + depth_scale = ( + response.source.depth_scale if response.source.depth_scale > 0 else 1.0 + ) np.multiply(img, 1.0 / depth_scale, out=out_array, casting="unsafe") return @@ -842,14 +825,10 @@ def _convert_image_response_inplace( return else: - raise SpotCamStreamError( - f"Unsupported pixel format: {pixel_format}" - ) + raise SpotCamStreamError(f"Unsupported pixel format: {pixel_format}") else: - raise SpotCamStreamError( - f"Unsupported image format: {image_proto.format}" - ) + raise SpotCamStreamError(f"Unsupported image format: {image_proto.format}") def _get_ccm_scratch(self, shape: Tuple[int, ...]) -> np.ndarray: scratch = self._ccm_scratch_by_shape.get(shape) diff --git a/PySpotObserver/pyspotobserver/color_correction.py b/PySpotObserver/pyspotobserver/color_correction.py index 7e16b20..564a888 100644 --- a/PySpotObserver/pyspotobserver/color_correction.py +++ b/PySpotObserver/pyspotobserver/color_correction.py @@ -12,51 +12,75 @@ # applied in numpy as: img @ M.T for (H, W, 3) images. # BACK and HAND cameras use identity until calibration data is available. _GOUGER_CCMS: dict = { - CameraType.LEFT: np.array([ - [ 1.4980662, -0.0707718, -0.0458571], - [-0.4865953, 0.8866953, -0.4204259], - [ 0.5937464, 0.3074408, 1.1898727], - ], dtype=np.float32), - CameraType.RIGHT: np.array([ - [ 2.1307423, 0.1372727, 0.0647798], - [ 0.0568109, 1.8951481, -0.4296177], - [ 0.5981083, 0.3732290, 2.0813310], - ], dtype=np.float32), - CameraType.FRONTLEFT: np.array([ - [ 1.4337976, -0.1356816, -0.1389128], - [-0.3111499, 0.9810063, -0.2869629], - [ 0.3030030, 0.0814753, 0.9887888], - ], dtype=np.float32), - CameraType.FRONTRIGHT: np.array([ - [ 2.9136648, -0.0958534, -0.0561581], - [-0.5915730, 1.7521565, -0.7878229], - [ 0.6022132, 0.1194339, 1.6545299], - ], dtype=np.float32), + CameraType.LEFT: np.array( + [ + [1.4980662, -0.0707718, -0.0458571], + [-0.4865953, 0.8866953, -0.4204259], + [0.5937464, 0.3074408, 1.1898727], + ], + dtype=np.float32, + ), + CameraType.RIGHT: np.array( + [ + [2.1307423, 0.1372727, 0.0647798], + [0.0568109, 1.8951481, -0.4296177], + [0.5981083, 0.3732290, 2.0813310], + ], + dtype=np.float32, + ), + CameraType.FRONTLEFT: np.array( + [ + [1.4337976, -0.1356816, -0.1389128], + [-0.3111499, 0.9810063, -0.2869629], + [0.3030030, 0.0814753, 0.9887888], + ], + dtype=np.float32, + ), + CameraType.FRONTRIGHT: np.array( + [ + [2.9136648, -0.0958534, -0.0561581], + [-0.5915730, 1.7521565, -0.7878229], + [0.6022132, 0.1194339, 1.6545299], + ], + dtype=np.float32, + ), CameraType.BACK: _IDENTITY_3x3, CameraType.HAND: _IDENTITY_3x3, } _TUSKER_CCMS: dict = { - CameraType.LEFT: np.array([ - [ 1.8952084, -0.0917113, -0.1345257], - [-0.2042349, 1.4999812, -0.4616307], - [ 0.3097906, 0.0590297, 1.4913727], - ], dtype=np.float32), - CameraType.RIGHT: np.array([ - [ 1.5771384, -0.0511120, -0.0410739], - [-0.6573969, 0.9523082, -0.5956711], - [ 0.3767823, 0.3439774, 1.8481518], - ], dtype=np.float32), - CameraType.FRONTLEFT: np.array([ - [ 2.1959698, 0.0050106, -0.0217284], - [-0.3654362, 1.3668996, -0.4870523], - [ 0.5744365, 0.2933731, 1.5111261], - ], dtype=np.float32), - CameraType.FRONTRIGHT: np.array([ - [ 3.1622671, -0.1799195, -0.1370601], - [-0.8215924, 2.0136070, -0.9268775], - [ 0.4902452, -0.0647052, 2.0256366], - ], dtype=np.float32), + CameraType.LEFT: np.array( + [ + [1.8952084, -0.0917113, -0.1345257], + [-0.2042349, 1.4999812, -0.4616307], + [0.3097906, 0.0590297, 1.4913727], + ], + dtype=np.float32, + ), + CameraType.RIGHT: np.array( + [ + [1.5771384, -0.0511120, -0.0410739], + [-0.6573969, 0.9523082, -0.5956711], + [0.3767823, 0.3439774, 1.8481518], + ], + dtype=np.float32, + ), + CameraType.FRONTLEFT: np.array( + [ + [2.1959698, 0.0050106, -0.0217284], + [-0.3654362, 1.3668996, -0.4870523], + [0.5744365, 0.2933731, 1.5111261], + ], + dtype=np.float32, + ), + CameraType.FRONTRIGHT: np.array( + [ + [3.1622671, -0.1799195, -0.1370601], + [-0.8215924, 2.0136070, -0.9268775], + [0.4902452, -0.0647052, 2.0256366], + ], + dtype=np.float32, + ), CameraType.BACK: _IDENTITY_3x3, CameraType.HAND: _IDENTITY_3x3, } @@ -64,4 +88,4 @@ _ROBOT_CCMS: dict = { _GOUGER_IP: _GOUGER_CCMS, _TUSKER_IP: _TUSKER_CCMS, -} \ No newline at end of file +} diff --git a/PySpotObserver/pyspotobserver/config.py b/PySpotObserver/pyspotobserver/config.py index 84142ee..68befcd 100644 --- a/PySpotObserver/pyspotobserver/config.py +++ b/PySpotObserver/pyspotobserver/config.py @@ -14,6 +14,7 @@ class CameraType(IntFlag): Camera types available on Spot robot. Uses IntFlag to support bitwise operations for camera masks. """ + BACK = 0x1 FRONTLEFT = 0x2 FRONTRIGHT = 0x4 @@ -55,6 +56,7 @@ class SpotConfig: Can be loaded from YAML file or instantiated directly. """ + # Connection settings robot_ip: str username: str = "" @@ -112,7 +114,7 @@ def from_yaml(cls, yaml_path: Union[Path, str]) -> "SpotConfig": if not path.exists(): raise FileNotFoundError(f"Config file not found: {path}") - with open(path, 'r') as f: + with open(path, "r") as f: data = yaml.safe_load(f) if data is None: @@ -132,25 +134,25 @@ def to_yaml(self, yaml_path: Union[Path, str]) -> None: # Convert to dict, excluding extra_params if empty data = { - 'robot_ip': self.robot_ip, - 'username': self.username, - 'password': self.password, - 'image_buffer_size': self.image_buffer_size, - 'image_quality_percent': self.image_quality_percent, - 'request_timeout_seconds': self.request_timeout_seconds, - 'vision_model_path': self.vision_model_path, - 'vision_providers': self.vision_providers, - 'sdk_name': self.sdk_name, - 'connection_retry_attempts': self.connection_retry_attempts, - 'connection_retry_delay_ms': self.connection_retry_delay_ms, + "robot_ip": self.robot_ip, + "username": self.username, + "password": self.password, + "image_buffer_size": self.image_buffer_size, + "image_quality_percent": self.image_quality_percent, + "request_timeout_seconds": self.request_timeout_seconds, + "vision_model_path": self.vision_model_path, + "vision_providers": self.vision_providers, + "sdk_name": self.sdk_name, + "connection_retry_attempts": self.connection_retry_attempts, + "connection_retry_delay_ms": self.connection_retry_delay_ms, } data = {key: value for key, value in data.items() if value is not None} if self.extra_params: - data['extra_params'] = self.extra_params + data["extra_params"] = self.extra_params - with open(path, 'w') as f: + with open(path, "w") as f: yaml.dump(data, f, default_flow_style=False, sort_keys=False) def __repr__(self) -> str: diff --git a/PySpotObserver/pyspotobserver/connection.py b/PySpotObserver/pyspotobserver/connection.py index 55048f0..abe48d9 100644 --- a/PySpotObserver/pyspotobserver/connection.py +++ b/PySpotObserver/pyspotobserver/connection.py @@ -20,11 +20,13 @@ class SpotConnectionError(Exception): """Base exception for SpotConnection errors.""" + pass class SpotAuthenticationError(SpotConnectionError): """Raised when authentication fails.""" + pass @@ -294,9 +296,7 @@ def create_cam_stream( logger.info(f"Created camera stream: {stream_id}") if auto_start_mask is not None: - logger.info( - f"Auto-starting stream '{stream_id}' with mask: {auto_start_mask}" - ) + logger.info(f"Auto-starting stream '{stream_id}' with mask: {auto_start_mask}") try: stream.start_streaming(auto_start_mask) except Exception: diff --git a/PySpotObserver/pyspotobserver/vision_pipeline.py b/PySpotObserver/pyspotobserver/vision_pipeline.py index 8bbdd26..28428d2 100644 --- a/PySpotObserver/pyspotobserver/vision_pipeline.py +++ b/PySpotObserver/pyspotobserver/vision_pipeline.py @@ -211,10 +211,7 @@ def _ensure_buffers( self._depth_buffer = np.empty(depth_shape, dtype=self._depth_dtype) if self._depth_dtype == np.dtype(np.float16): - if ( - self._depth_resize_buffer is None - or self._depth_resize_buffer.shape != depth_shape - ): + if self._depth_resize_buffer is None or self._depth_resize_buffer.shape != depth_shape: self._depth_resize_buffer = np.empty(depth_shape, dtype=np.float32) else: self._depth_resize_buffer = None @@ -279,8 +276,7 @@ def run_vision_pipeline( resolved_model_path = model_path or os.environ.get(DEFAULT_MODEL_ENV_VAR) if not resolved_model_path: raise VisionPipelineError( - "Vision model path is required. Pass model_path or set " - f"{DEFAULT_MODEL_ENV_VAR}." + f"Vision model path is required. Pass model_path or set {DEFAULT_MODEL_ENV_VAR}." ) provider_tuple = _normalize_providers(providers) diff --git a/PySpotObserver/requirements.txt b/PySpotObserver/requirements.txt index feef701..437fbda 100644 --- a/PySpotObserver/requirements.txt +++ b/PySpotObserver/requirements.txt @@ -5,3 +5,10 @@ # Vision pipeline support is optional because it requires ONNX Runtime. # Install it when needed with: # pip install -e ".[vision]" + +# Optional dependencies +# For development +pytest>=7.4.0 # Testing framework +pytest-asyncio>=0.21.0 # Async test support +ruff>=0.15.2 # Code formatting and linting +mypy>=1.5.0 # Type checking diff --git a/PySpotObserver/setup.py b/PySpotObserver/setup.py index 511ef1e..92da461 100644 --- a/PySpotObserver/setup.py +++ b/PySpotObserver/setup.py @@ -15,7 +15,7 @@ DEV_REQUIRES = [ "pytest>=7.4.0", "pytest-asyncio>=0.21.0", - "black>=23.0.0", + "ruff>=0.15.2", "mypy>=1.5.0", ] diff --git a/PySpotObserver/tests/test_config.py b/PySpotObserver/tests/test_config.py index 4842d5f..dc94168 100644 --- a/PySpotObserver/tests/test_config.py +++ b/PySpotObserver/tests/test_config.py @@ -28,12 +28,18 @@ def test_get_source_name_rgb(self): def test_get_source_name_depth(self): """Test depth camera source name generation.""" - assert CameraType.get_source_name(CameraType.FRONTLEFT, depth=True) == \ - "frontleft_depth_in_visual_frame" - assert CameraType.get_source_name(CameraType.RIGHT, depth=True) == \ - "right_depth_in_visual_frame" - assert CameraType.get_source_name(CameraType.HAND, depth=True) == \ - "hand_depth_in_hand_color_frame" + assert ( + CameraType.get_source_name(CameraType.FRONTLEFT, depth=True) + == "frontleft_depth_in_visual_frame" + ) + assert ( + CameraType.get_source_name(CameraType.RIGHT, depth=True) + == "right_depth_in_visual_frame" + ) + assert ( + CameraType.get_source_name(CameraType.HAND, depth=True) + == "hand_depth_in_hand_color_frame" + ) class TestSpotConfig: @@ -115,8 +121,7 @@ def test_yaml_file_not_found(self): def test_extra_params(self): """Test extra_params field.""" config = SpotConfig( - robot_ip="192.168.80.3", - extra_params={"location": "lab", "experiment": "test1"} + robot_ip="192.168.80.3", extra_params={"location": "lab", "experiment": "test1"} ) assert config.extra_params["location"] == "lab" assert config.extra_params["experiment"] == "test1" diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..c4cdae9 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,124 @@ +# ============================================================================= +# Ruff Configuration +# ============================================================================= +# Ruff is an extremely fast Python linter and formatter, written in Rust. +# Docs: https://docs.astral.sh/ruff/configuration/ +# +# ============================================================================= + +target-version = "py312" +line-length = 100 + +# ============================================================================= +# EXCLUSIONS +# ============================================================================= +# Directories/files to exclude from linting and formatting. +# +# Standard exclusions: +# __pycache__ - Python bytecode cache +# .git - Git internals +# .venv - Virtual environment +# experiments - Experiment outputs (logs, checkpoints, etc.) +# +# Project-specific: +# src/archive - Archived/deprecated code (kept for reference) +# +# ============================================================================= + +exclude = [ + "__pycache__", + ".git", + ".venv", + "experiments", + "src/archive", +] + +# ============================================================================= +# LINT RULES +# ============================================================================= +# Rule selection philosophy: Start strict, add ignores as needed. +# Better to have CI catch potential issues than miss them. +# +# Rule categories enabled: +# STARTER (safe defaults): +# F - Pyflakes: undefined names, unused imports, shadowing +# E/W - pycodestyle: PEP8 style violations (errors/warnings) +# I - isort: import sorting and grouping +# +# RECOMMENDED (quality): +# B - flake8-bugbear: common bugs, anti-patterns, footguns +# UP - pyupgrade: automatic Python version upgrades +# SIM - flake8-simplify: simplifiable constructs +# RUF - Ruff-specific: Ruff's own high-quality rules +# +# RESEARCH/ML (domain-specific): +# NPY - NumPy: NumPy-specific rules (deprecated APIs, etc.) +# PD - pandas-vet: pandas-specific rules (chained assignment, etc.) +# ANN - flake8-annotations: type annotation issues +# Note: This project uses typeguard, so should have good coverage. +# If too noisy, add specific ANN codes to per-file-ignores. +# +# ============================================================================= + +[lint] +select = [ + # --- Starter (safe defaults) --- + "F", # Pyflakes: undefined names, unused imports + "E", "W", # pycodestyle: PEP8 errors (E) and warnings (W) + "I", # isort: import sorting + + # --- Recommended (quality) --- + "B", # flake8-bugbear: common bugs and anti-patterns + "UP", # pyupgrade: use modern Python syntax + "SIM", # flake8-simplify: simplifiable constructs + "RUF", # Ruff-specific: high-quality Ruff rules + + # --- Research/ML (domain-specific) --- + "NPY", # NumPy-specific rules + "PD", # pandas-vet + "ANN", # flake8-annotations: type hints +] + +ignore = [ + "E501", # Line too long: Formatter handles this; remaining cases are acceptable + "W292", # No newline at end of file: Formatter handles this automatically + "RUF001", # Ambiguous Unicode: Greek letters (σ, α) and math symbols (×) are intentional in scientific code +] + +# ============================================================================= +# PER-FILE IGNORES +# ============================================================================= +# Add specific rule ignores for files that have legitimate reasons. +# Format: "path/pattern" = ["CODE1", "CODE2"] +# ============================================================================= + +[lint.per-file-ignores] +# Add file-specific ignores here as needed + +# ============================================================================= +# ISORT CONFIGURATION +# ============================================================================= +# isort handles import sorting and grouping. +# known-first-party: Packages that are part of this project (not third-party). +# ============================================================================= + +# ============================================================================= +# FLAKE8-ANNOTATIONS CONFIGURATION +# ============================================================================= +# allow-star-arg-any: Suppress ANN401 for *args/**kwargs with Any type. +# Rationale: Pass-through kwargs (e.g., to matplotlib) are impractical to type +# precisely. Using Any is standard practice for wrapper functions. +# ============================================================================= + +[lint.flake8-annotations] +allow-star-arg-any = true + +# ============================================================================= +# FORMAT CONFIGURATION +# ============================================================================= +# Ruff format is a drop-in replacement for Black. +# docstring-code-format: Format code examples in docstrings. +# ============================================================================= + +[format] +docstring-code-format = true \ No newline at end of file