Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
f83a921
feat: achieve 100% nf-core compliance across all 4 WASP2 pipelines
Jaureguy760 Mar 6, 2026
c31bddf
fix(nf-scatac): resolve ${projectDir} in samplesheet CSV paths
Jaureguy760 Mar 6, 2026
18a6a9c
fix(nf-atacseq): add BWA_INDEX to container override selector
Jaureguy760 Mar 6, 2026
8e6231a
fix(nf-outrider): fix 6 OUTRIDER API bugs in R script and subworkflow
Jaureguy760 Mar 6, 2026
6646e56
fix(counting): guard against empty CSV from bedtools intersect
Jaureguy760 Mar 6, 2026
ee8cc6d
fix(tests): replace repetitive reference with realistic genome
Jaureguy760 Mar 6, 2026
186ecf3
feat(nf-rnaseq): add ARM/Apple Silicon compatibility profile
Jaureguy760 Mar 6, 2026
15e24b5
fix(wasp): fix total_seqs mismatch discarding all het-variant reads
Jaureguy760 Mar 6, 2026
0239316
fix(nf-atacseq): regenerate test data with realistic reference
Jaureguy760 Mar 6, 2026
43f01cb
docs(nf-rnaseq): add ARM architecture warning to test data generator
Jaureguy760 Mar 6, 2026
e4f4e7a
fix(pipelines): remove global process.conda override and fix module e…
Jaureguy760 Mar 6, 2026
cb306e9
fix(pipelines): resolve all nf-core compliance gaps across 4 pipelines
Jaureguy760 Mar 6, 2026
b8d5099
fix(pipelines): achieve 100% nf-core compliance across all 4 pipelines
Jaureguy760 Mar 6, 2026
d929e6e
fix(pipelines): achieve nf-core lint compliance across all 4 WASP2 pi…
Jaureguy760 Mar 6, 2026
38bcfe5
Merge feat/nfcore-compliance-full into dev
Jaureguy760 Mar 6, 2026
b19a23c
chore: clean up stray files and harden .gitignore
Jaureguy760 Mar 6, 2026
f9948aa
chore: add AI agent file pollution prevention layers
Jaureguy760 Mar 6, 2026
f752722
feat: add CI lint workflow, improve nf-tests, update pipeline READMEs
Jaureguy760 Mar 6, 2026
a3b7c32
feat: add chr21 1000 Genomes real data download script
Jaureguy760 Mar 6, 2026
197affd
fix: revert GTF from count-variants-sc, harden Dockerfile, fix docs
Jaureguy760 Mar 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ jobs:

- name: Run cargo test
working-directory: rust
run: cargo test
run: PYO3_PYTHON=$(command -v python3) cargo test

- name: Run clippy
working-directory: rust
run: cargo clippy -- -W warnings
run: PYO3_PYTHON=$(command -v python3) cargo clippy -- -W warnings
93 changes: 93 additions & 0 deletions .github/workflows/nf-lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# WASP2 Nextflow pipeline lint workflow
# Runs nf-core pipelines lint on all 4 Nextflow pipelines

name: Nextflow Lint

on:
push:
branches: [master, dev]
pull_request:
branches: [master, dev]

permissions:
contents: read

jobs:
# ===========================================================================
# nf-core pipelines lint (matrix: 4 pipelines)
# ===========================================================================
nf-lint:
name: nf-core lint (${{ matrix.pipeline }})
runs-on: ubuntu-latest
timeout-minutes: 15
strategy:
fail-fast: false
matrix:
pipeline: [nf-atacseq, nf-rnaseq, nf-scatac, nf-outrider]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: "3.12"

- name: Install Nextflow
uses: nf-core/setup-nextflow@v2

- name: Install nf-core tools
run: pip install nf-core

- name: Run nf-core pipelines lint
working-directory: pipelines/${{ matrix.pipeline }}
run: |
# Run lint and capture output + exit code
# Expected: exactly 1 failure for manifest.name not starting with nf-core/
set +e
nf-core pipelines lint --json lint-results.json 2>&1 | tee lint-output.txt
LINT_EXIT=$?
set -e

if [ "$LINT_EXIT" -eq 0 ]; then
echo "Lint passed with no failures."
exit 0
fi

# Parse the JSON results to check if the only failure is the expected one
# nf-core lint JSON has: {passed, warned, failed, ignored}
FAIL_COUNT=$(python3 -c "
import json, sys
with open('lint-results.json') as f:
data = json.load(f)
failed = data.get('failed', [])
# Filter out the expected manifest.name failure
unexpected = [f for f in failed if not (
'manifest.name' in str(f) and 'nf-core' in str(f)
)]
print(len(unexpected))
")

if [ "$FAIL_COUNT" -eq 0 ]; then
echo ""
echo "====================================================================="
echo "Lint complete. Only expected failure: manifest.name not nf-core/*"
echo "====================================================================="
exit 0
else
echo ""
echo "====================================================================="
echo "ERROR: $FAIL_COUNT unexpected lint failure(s) detected!"
echo "====================================================================="
python3 -c "
import json
with open('lint-results.json') as f:
data = json.load(f)
failed = data.get('failed', [])
unexpected = [f for f in failed if not (
'manifest.name' in str(f) and 'nf-core' in str(f)
)]
for f in unexpected:
print(f)
"
exit 1
fi
29 changes: 28 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,11 @@ benchmark_figures/
# Sanity test data (downloaded from GitHub releases)
tests/sanity/data/

# Real data test files (downloaded from 1000 Genomes, ~2-3 GB)
tests/real_data/data/
tests/real_data/samplesheets/
tests/real_data/configs/

# Nextflow runtime
.nextflow/
.nextflow.log*
Expand All @@ -175,7 +180,29 @@ test-output/
results_stub/
pipelines/*/test-output/
pipelines/*/results_stub/
pipelines/*/results_*/
pipelines/*/artifacts/

# Artifacts directory
artifacts/

# Benchmark infrastructure (large data/envs/results)
test_benchmarks/

# Claude Code local state
.claude/

# Nextflow pipeline-level logs
pipelines/*/.nextflow.log*
pipelines/*/.nf-test.log

# Nextflow reports and visualizations
trace.txt
timeline.html
report.html
dag.svg
dag.dot

# Claude Code memory files (per-directory)
**/CLAUDE.md
!./CLAUDE.md
!/CLAUDE.md
25 changes: 25 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,31 @@ repos:
hooks:
- id: gitleaks

# Block common AI agent artifacts from being committed
- repo: local
hooks:
- id: forbid-agent-artifacts
name: Block AI agent artifacts
entry: "Forbidden: likely an AI agent artifact"
language: fail
files: |
(?x)^(
ANALYSIS\.md|
SUMMARY\.md|
REVIEW\.md|
debug_.*\.py|
test_scratch.*|
tmpclaude.*|
.*\.debug\.(py|js|ts)|
run_debug.*\.py|
-\.bam
)$
- id: forbid-binaries-in-src
name: Block binary files in source directories
entry: "Forbidden: binary file in source directory"
language: fail
files: '^(src|pipelines)/.*\.(bam|bai|vcf\.gz|vcf\.gz\.tbi|sam|fastq\.gz|fq\.gz)$'

# Type checking: basedpyright (stricter Pyright fork)
- repo: local
hooks:
Expand Down
33 changes: 33 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# WASP2 — Project Instructions

## What This Is
WASP2 is a mapping bias correction tool for genomic analyses. It includes:
- A Python/Rust core library (`src/wasp2/`)
- 4 Nextflow pipelines (`pipelines/nf-atacseq`, `nf-rnaseq`, `nf-scatac`, `nf-outrider`)
- Benchmarking infrastructure (`benchmarking/`)

## File Hygiene Rules
- NEVER create files unless absolutely necessary for the task
- NEVER create placeholder/stub files (empty PNGs, dummy data, skeleton configs)
- NEVER create files in the repo root — use appropriate subdirectories
- ALWAYS prefer editing existing files over creating new ones
- ALWAYS clean up temp files created during debugging before finishing
- If you create a test/debug script, delete it when done
- NEVER commit binary files (BAM, BAI, VCF, FASTQ) to git — use test fixtures in `tests/`

## Nextflow Development
- Pipelines follow nf-core conventions (modules, subworkflows, configs)
- Use `nextflow clean -f` after test runs to remove work directories
- Test profiles use small chr21 data — don't download full genomes
- Aligner: BWA (default) or Bowtie2 via `--aligner` parameter

## Code Style
- Python: ruff for linting/formatting, basedpyright for types
- Pre-commit hooks are configured — run `pre-commit install` after cloning
- Nextflow: follow nf-core module patterns (meta map, versions.yml emit)

## Git Workflow
- Feature branches → PR to `dev` → release merges `dev` → `main`
- Never commit directly to `main` or `dev`
- Stage files individually (`git add <file>`) — never `git add .`
- Run `pre-commit run --all-files` before committing
10 changes: 8 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ LABEL maintainer="Jeff Jaureguy <jeffpjaureguy@gmail.com>"
# Install runtime deps + temporary build deps for pybedtools C++ extension
# Combined into one RUN to minimize layers; build tools purged at the end
RUN apt-get update && apt-get install -y --no-install-recommends \
# PID 1 init for proper signal handling (Nextflow/HPC)
tini \
# Bioinformatics tools
samtools \
bcftools \
Expand All @@ -106,14 +108,16 @@ RUN --mount=type=cache,target=/root/.cache/pip \
pip install /tmp/*.whl \
&& rm -rf /tmp/*.whl \
&& apt-get purge -y --auto-remove g++ zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
&& rm -rf /var/lib/apt/lists/* \
&& ! command -v g++

WORKDIR /app

# Verify non-Python tools are available (Python tools skipped during build
# because Polars uses AVX2 instructions that fail under QEMU emulation
# on ARM64 CI runners building linux/amd64 images)
RUN samtools --version && bcftools --version && bedtools --version
RUN samtools --version && bcftools --version && bedtools --version \
&& wasp2-ipscore --help > /dev/null 2>&1

# Create non-root user for security
RUN groupadd -g 1000 wasp2 && \
Expand Down Expand Up @@ -147,5 +151,7 @@ WORKDIR /data
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD wasp2-count --version || exit 1

ENTRYPOINT ["tini", "--"]

# Default command
CMD ["wasp2-count", "--help"]
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

.PHONY: all build install test test-quick test-sanity lint format clean help
.PHONY: download-sanity-data sanity-data-local rust-build rust-test
.PHONY: test-mapping-parity

# Configuration
PYTHON ?= python
Expand Down Expand Up @@ -48,7 +49,7 @@ rust-dev: ## Build Rust extension in debug mode (faster compile)
$(MATURIN) develop -m $(RUST_DIR)/Cargo.toml

rust-test: ## Run Rust unit tests
cd $(RUST_DIR) && $(CARGO) test
cd $(RUST_DIR) && PYO3_PYTHON=$$($(PYTHON) -c "import sys; print(sys.executable)") $(CARGO) test

rust-bench: ## Run Rust benchmarks
cd $(RUST_DIR) && $(CARGO) bench
Expand All @@ -68,6 +69,9 @@ test-quick: ## Run quick validation tests only
test-rust: ## Run Rust-specific tests
$(PYTEST) $(TESTS_DIR) -v --tb=short -m "rust"

test-mapping-parity: ## Run mapping parity tests against legacy and unified paths
$(PYTEST) $(TESTS_DIR)/regression/test_mapping_stage_parity.py -v --tb=short

test-integration: ## Run integration tests
$(PYTEST) $(TESTS_DIR) -v --tb=short -m "integration"

Expand Down
Loading
Loading