diff --git a/modules/data.remote/inst/ccmmf/events/make_events_statewide.R b/modules/data.remote/inst/ccmmf/events/make_events_statewide.R
new file mode 100644
index 00000000000..dd2d8fac372
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/events/make_events_statewide.R
@@ -0,0 +1,522 @@
+#!/usr/bin/env Rscript
+# Build statewide PEcAn-style event files for one year (Parquet + JSON) from matched LandIQ+MSLSP.
+# Writes phenology, planting, harvest, and optionally tillage event tables under event_files/.
+#
+# Main inputs: CCMMF_MANAGEMENT (paths to matched assigned_year=Y.parquet, scripts/traits pool,
+#   tillage/ndti_v4.1). Optional: HARVEST_LOOKUP_RDS, HARVEST_WOODY_DESTRUCTIVE, TILLAGE_* env.
+# Main outputs: phenology_statewide_Y, planting_statewide_Y, harvest_statewide_Y, tillage_statewide_*.
+# How to run: Rscript make_events_statewide.R <year> [phenology|planting|harvest|tillage]
+# Workflow: end of monitoring chart (statewide outputs). Phenology/planting/harvest read assigned
+#   parquet and pool_calculations_from_lookup.R. Harvest dates: row/rice use mslsp_OGMn; hay/woody
+#   use mslsp_OGD. Tillage uses NDTI plus assigned parcels in a buffered year window.
+
+suppressPackageStartupMessages({
+  library(data.table)
+  library(arrow)
+  library(jsonlite)
+  library(lubridate)
+})
+
+path_management <- Sys.getenv("CCMMF_MANAGEMENT", "/projectnb/dietzelab/ccmmf/management")
+matched_dir <- file.path(path_management, "phenology", "matched_landiq_mslsp_v4.1")
+pool_script <- file.path(path_management, "scripts", "traits", "pool_calculations_from_lookup.R")
+tillage_metrics_script <- file.path(path_management, "scripts", "tillage", "tillage_metrics.R")
+ndti_root <- file.path(path_management, "tillage", "ndti_v4.1")
+out_dir <- file.path(path_management, "event_files")
+
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args) < 1L) {
+  stop("Usage: Rscript make_events_statewide.R <year> [phenology|planting|harvest|tillage]")
+}
+year_arg <- as.integer(args[1L])
+if (is.na(year_arg)) {
+  stop("Year must be an integer, got: ", args[1L])
+}
+event_type <- if (length(args) >= 2L) {
+  match.arg(args[2L], c("phenology", "planting", "harvest", "tillage"))
+} else {
+  NULL
+}
+
+run_phenology <- is.null(event_type) || event_type == "phenology"
+run_planting <- is.null(event_type) || event_type == "planting"
+run_harvest <- is.null(event_type) || event_type == "harvest"
+run_tillage <- !is.null(event_type) && event_type == "tillage"
+
+msg_suffix <- if (is.null(event_type)) {
+  " (phenology + planting + harvest)"
+} else {
+  paste0(" event_type=", event_type)
+}
+message("[make_events_statewide] year=", year_arg, msg_suffix)
+
+dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
+
+#### Single-year assigned (phenology, planting, harvest)
+if (run_phenology || run_planting || run_harvest) {
+  assigned_file <- file.path(matched_dir, sprintf("assigned_year=%d.parquet", year_arg))
+  if (!file.exists(assigned_file)) {
+    stop("Missing assigned file: ", assigned_file)
+  }
+  assigned <- as.data.table(read_parquet(assigned_file))
+  assigned[, parcel_id := as.character(parcel_id)]
+  matched <- assigned[assigned_by == "matched"]
+  message("[assigned] ", nrow(assigned), " rows; matched: ", nrow(matched))
+
+  n_before <- nrow(matched)
+  matched <- matched[
+    !is.na(landiq_CLASS) & !is.na(landiq_SUBCLASS) & !is.na(landiq_PFT)
+  ]
+  message("  Dropped ", n_before - nrow(matched), " matched rows missing crop/PFT; ", nrow(matched), " remain")
+
+  if (run_phenology || run_planting) {
+    n_ev <- nrow(matched)
+    matched <- matched[!is.na(mslsp_EVImax) & !is.na(mslsp_EVIamp)]
+    message("  EVI filter: dropped ", n_ev - nrow(matched), " rows; ", nrow(matched), " remain")
+  }
+
+  if (run_harvest) {
+    for (col in c("mslsp_OGMn", "mslsp_OGD")) {
+      if (!col %in% names(matched)) {
+        matched[, (col) := NA]
+      }
+    }
+    matched[, pft_l := tolower(trimws(as.character(landiq_PFT)))]
+    matched[, harvest_date_str := NA_character_]
+    matched[pft_l %in% c("row", "rice"), harvest_date_str := as.character(mslsp_OGMn)]
+    matched[pft_l %in% c("hay", "woody"), harvest_date_str := as.character(mslsp_OGD)]
+    n_hd <- nrow(matched)
+    matched <- matched[
+      !is.na(harvest_date_str) & nzchar(harvest_date_str) & harvest_date_str != "NA"
+    ]
+    message(
+      "  Harvest-date filter (annuals OGMn, perennials OGD; no Peak): dropped ",
+      n_hd - nrow(matched), " rows; ", nrow(matched), " remain"
+    )
+    matched[, pft_l := NULL]
+  }
+}
+
+if (run_planting || run_harvest) {
+  pool_env <- new.env(parent = globalenv())
+  source(pool_script, local = pool_env)
+  harvest_rds <- Sys.getenv("HARVEST_LOOKUP_RDS", "")
+  if (nzchar(harvest_rds)) {
+    lk <- pool_env$load_trait_lookup(harvest_path = harvest_rds)
+    message("[pool] Loaded trait lookup (harvest_path=", harvest_rds, ")")
+  } else {
+    lk <- pool_env$load_trait_lookup()
+    message("[pool] Loaded trait lookup (default planting + harvest RDS paths)")
+  }
+}
+
+harvest_destructive_default <- tolower(Sys.getenv("HARVEST_WOODY_DESTRUCTIVE", "0")) %in% c("1", "true", "yes")
+
+#### PHENOLOGY
+if (run_phenology) {
+  message("[phenology] Building events")
+  pheno <- matched[, .(
+    site_id = parcel_id,
+    year = lubridate::year(mslsp_Peak),
+    leafonday = as.character(mslsp_50PCGI),
+    leafoffday = as.character(mslsp_50PCGD)
+  )]
+  pheno <- pheno[!is.na(leafonday) & !is.na(leafoffday)]
+  setorder(pheno, site_id, year)
+
+  pheno_parquet <- file.path(out_dir, sprintf("phenology_statewide_%d.parquet", year_arg))
+  write_parquet(pheno, pheno_parquet)
+  message("  Wrote ", pheno_parquet, " (", nrow(pheno), " rows)")
+
+  pheno_json_path <- file.path(out_dir, sprintf("phenology_statewide_%d.json", year_arg))
+  pheno_list <- lapply(split(pheno, pheno$site_id), function(rows) {
+    lapply(seq_len(nrow(rows)), function(i) {
+      list(
+        event_type = "phenology",
+        year = rows$year[i],
+        leafonday = rows$leafonday[i],
+        leafoffday = rows$leafoffday[i]
+      )
+    })
+  })
+  write(toJSON(pheno_list, auto_unbox = TRUE, pretty = TRUE), pheno_json_path)
+  message("  Wrote ", pheno_json_path)
+}
+
+#### PLANTING
+if (run_planting) {
+  message("[planting] Building events (C/N pools via LAI)")
+  planting_date_str <- as.character(matched$mslsp_OGI)
+
+  planting_rows <- vector("list", nrow(matched))
+  for (i in seq_len(nrow(matched))) {
+    row <- matched[i]
+    p <- tryCatch(
+      pool_env$initialize_planting(
+        ID = row$parcel_id,
+        DATE = planting_date_str[i],
+        PFT = row$landiq_PFT,
+        lk = lk,
+        class = row$landiq_CLASS,
+        subclass = row$landiq_SUBCLASS,
+        mslsp_EVImax = row$mslsp_EVImax,
+        mslsp_EVIamp = row$mslsp_EVIamp
+      ),
+      error = function(e) NULL
+    )
+    if (!is.null(p) && nrow(p) > 0) {
+      code <- paste0(trimws(as.character(row$landiq_CLASS)), as.character(row$landiq_SUBCLASS))
+      planting_rows[[i]] <- data.table(
+        site_id = row$parcel_id,
+        year = row$year,
+        season = row$season,
+        date = planting_date_str[i],
+        code = code,
+        PFT = row$landiq_PFT,
+        LAI = as.numeric(p$LAI[1]),
+        C_LEAF = as.numeric(p$C_LEAF[1]),
+        C_STEM = as.numeric(p$C_STEM[1]),
+        C_FINEROOT = as.numeric(p$C_FINEROOT[1]),
+        C_COARSEROOT = as.numeric(p$C_COARSEROOT[1]),
+        N_LEAF = as.numeric(p$N_LEAF[1]),
+        N_STEM = as.numeric(p$N_STEM[1]),
+        N_FINEROOT = as.numeric(p$N_FINEROOT[1]),
+        N_COARSEROOT = as.numeric(p$N_COARSEROOT[1])
+      )
+    }
+    if (i %% 10000L == 0L) {
+      message("  ", i, "/", nrow(matched), " done")
+    }
+  }
+  planting_dt <- rbindlist(planting_rows, use.names = TRUE, fill = TRUE)
+  setorder(planting_dt, site_id, year, season)
+  planting_dt[, event_type := "planting"]
+  setcolorder(planting_dt, c("event_type", setdiff(names(planting_dt), "event_type")))
+
+  plant_parquet <- file.path(out_dir, sprintf("planting_statewide_%d.parquet", year_arg))
+  write_parquet(planting_dt, plant_parquet)
+  message("  Wrote ", plant_parquet, " (", nrow(planting_dt), " rows)")
+
+  plant_json_path <- file.path(out_dir, sprintf("planting_statewide_%d.json", year_arg))
+  plant_list <- lapply(split(planting_dt, planting_dt$site_id), function(rows) {
+    lapply(seq_len(nrow(rows)), function(i) {
+      list(
+        event_type = rows$event_type[i],
+        date = rows$date[i],
+        year = rows$year[i],
+        season = rows$season[i],
+        crop = rows$code[i],
+        PFT = rows$PFT[i],
+        LAI = rows$LAI[i],
+        leaf_c_kg_m2 = rows$C_LEAF[i],
+        stem_c_kg_m2 = rows$C_STEM[i],
+        fineroot_c_kg_m2 = rows$C_FINEROOT[i],
+        coarseroot_c_kg_m2 = rows$C_COARSEROOT[i],
+        leaf_n_kg_m2 = rows$N_LEAF[i],
+        stem_n_kg_m2 = rows$N_STEM[i],
+        fineroot_n_kg_m2 = rows$N_FINEROOT[i],
+        coarseroot_n_kg_m2 = rows$N_COARSEROOT[i]
+      )
+    })
+  })
+  write(toJSON(plant_list, auto_unbox = TRUE, pretty = TRUE), plant_json_path)
+  message("  Wrote ", plant_json_path)
+}
+
+#### HARVEST (removal fractions for SIPNET / PEcAn; same LandIQ row as planting)
+if (run_harvest) {
+  message("[harvest] Building events (lookup-based removal fractions)")
+  has_dest_col <- "destructive" %in% names(matched)
+
+  harvest_rows <- vector("list", nrow(matched))
+  for (i in seq_len(nrow(matched))) {
+    row <- matched[i]
+    code <- paste0(trimws(as.character(row$landiq_CLASS)), as.character(row$landiq_SUBCLASS))
+    dest <- if (has_dest_col) isTRUE(as.logical(row$destructive[1])) else harvest_destructive_default
+    h <- tryCatch(
+      pool_env$initialize_harvest_from_lookup(
+        ID = row$parcel_id,
+        DATE = as.character(row$harvest_date_str)[1],
+        code = code,
+        PFT = row$landiq_PFT,
+        lk = lk,
+        destructive = dest
+      ),
+      error = function(e) NULL
+    )
+    if (!is.null(h) && nrow(h) > 0) {
+      harvest_rows[[i]] <- data.table(
+        site_id = row$parcel_id,
+        year = row$year,
+        season = row$season,
+        date = as.character(row$harvest_date_str)[1],
+        CLASS_SUBCLASS = code,
+        PFT = row$landiq_PFT,
+        frac_above_removed_0to1 = as.numeric(h$AGB_REMOVED[1]),
+        frac_above_to_litter_0to1 = as.numeric(h$AGB_LITTER[1]),
+        frac_below_removed_0to1 = as.numeric(h$BGB_REMOVED[1]),
+        frac_below_to_litter_0to1 = as.numeric(h$BGB_LITTER[1])
+      )
+    }
+    if (i %% 10000L == 0L) {
+      message("  ", i, "/", nrow(matched), " done")
+    }
+  }
+  harvest_dt <- rbindlist(harvest_rows, use.names = TRUE, fill = TRUE)
+  setorder(harvest_dt, site_id, year, season)
+  harvest_dt[, event_type := "harvest"]
+  setcolorder(harvest_dt, c("event_type", setdiff(names(harvest_dt), "event_type")))
+
+  harvest_parquet <- file.path(out_dir, sprintf("harvest_statewide_%d.parquet", year_arg))
+  write_parquet(harvest_dt, harvest_parquet)
+  message("  Wrote ", harvest_parquet, " (", nrow(harvest_dt), " rows)")
+
+  harvest_json_path <- file.path(out_dir, sprintf("harvest_statewide_%d.json", year_arg))
+  harvest_list <- lapply(split(harvest_dt, harvest_dt$site_id), function(rows) {
+    lapply(seq_len(nrow(rows)), function(i) {
+      list(
+        event_type = rows$event_type[i],
+        date = rows$date[i],
+        year = rows$year[i],
+        season = rows$season[i],
+        crop = rows$CLASS_SUBCLASS[i],
+        PFT = rows$PFT[i],
+        frac_above_removed_0to1 = rows$frac_above_removed_0to1[i],
+        frac_above_to_litter_0to1 = rows$frac_above_to_litter_0to1[i],
+        frac_below_removed_0to1 = rows$frac_below_removed_0to1[i],
+        frac_below_to_litter_0to1 = rows$frac_below_to_litter_0to1[i]
+      )
+    })
+  })
+  write(toJSON(harvest_list, auto_unbox = TRUE, pretty = TRUE), harvest_json_path)
+  message("  Wrote ", harvest_json_path)
+}
+
+#### TILLAGE (buffered multi-year assigned + NDTI; parcel chunks)
+if (run_tillage) {
+  suppressPackageStartupMessages(library(dplyr))
+  if (!file.exists(tillage_metrics_script)) {
+    stop("Missing tillage_metrics.R: ", tillage_metrics_script)
+  }
+  source(tillage_metrics_script)
+
+  buf <- suppressWarnings(as.integer(Sys.getenv("TILLAGE_BUFFER_YEARS", "1")))
+  if (is.na(buf) || buf < 0L) {
+    buf <- 1L
+  }
+  chunk_n <- suppressWarnings(as.integer(Sys.getenv("TILLAGE_PARCEL_CHUNK", "3000")))
+  if (is.na(chunk_n) || chunk_n < 1L) {
+    chunk_n <- 3000L
+  }
+
+  year_first <- year_arg
+  year_last <- year_arg
+  load_years <- seq(year_first - buf, year_last + buf)
+
+  message(
+    "[tillage] output year ", year_arg, " | load years ", min(load_years), ":", max(load_years),
+    " | chunk ", chunk_n
+  )
+
+  list_ndti_parquet <- function(yrs) {
+    fl <- character(0)
+    for (y in yrs) {
+      ydir <- file.path(ndti_root, sprintf("year=%d", y))
+      if (!dir.exists(ydir)) {
+        next
+      }
+      fl <- c(
+        fl,
+        Sys.glob(file.path(ydir, sprintf("ndti_year=%d_month=*.parquet", y))),
+        Sys.glob(file.path(ydir, "*.parquet"))
+      )
+    }
+    unique(fl[file.exists(fl)])
+  }
+  ndti_files <- list_ndti_parquet(load_years)
+  if (length(ndti_files) == 0L) {
+    stop("[tillage] No NDTI parquet under ", ndti_root)
+  }
+  message("[tillage] NDTI files found: ", length(ndti_files))
+
+  mslsp_parts <- list()
+  for (y in load_years) {
+    f <- file.path(matched_dir, sprintf("assigned_year=%d.parquet", y))
+    if (!file.exists(f)) {
+      message("[tillage] skip missing ", f)
+      next
+    }
+    mslsp_parts[[length(mslsp_parts) + 1L]] <- as.data.table(read_parquet(f))
+  }
+  if (length(mslsp_parts) == 0L) {
+    stop("[tillage] No assigned parquet for load years")
+  }
+
+  mslsp_all <- rbindlist(mslsp_parts, use.names = TRUE, fill = TRUE)
+  mslsp_all[, parcel_id := as.character(parcel_id)]
+  mslsp_all <- mslsp_all[assigned_by == "matched"]
+  mslsp_all <- mslsp_all[!is.na(mslsp_OGI) & !is.na(mslsp_OGMn)]
+  mslsp_all[, OGI_date := as.Date(mslsp_OGI)]
+  mslsp_all[, OGMn_date := as.Date(mslsp_OGMn)]
+  mslsp_all <- mslsp_all[!is.na(OGI_date) & !is.na(OGMn_date)]
+
+  phenology_full <- mslsp_all[, .(parcel_id, year, OGI_date, OGMn_date)]
+  pft_y <- mslsp_all[, .(PFT = landiq_PFT[1L]), by = .(parcel_id, year)]
+
+  message("[tillage] phenology rows ", nrow(phenology_full), " | parcels ", uniqueN(phenology_full$parcel_id))
+
+  read_ndti_for_parcels <- function(parcel_ids, yrs, root) {
+    pid_unique <- unique(as.character(parcel_ids))
+    parts <- list()
+    for (y in yrs) {
+      ydir <- file.path(root, sprintf("year=%d", y))
+      if (!dir.exists(ydir)) {
+        next
+      }
+      # open_dataset(ydir) fails when year=* contains non-parquet (e.g. logs/*.log)
+      fl <- c(
+        Sys.glob(file.path(ydir, sprintf("ndti_year=%d_month=*.parquet", y))),
+        Sys.glob(file.path(ydir, "*.parquet"))
+      )
+      fl <- unique(fl[file.exists(fl)])
+      if (length(fl) == 0L) {
+        next
+      }
+      ds <- tryCatch(arrow::open_dataset(fl), error = function(e) NULL)
+      if (is.null(ds)) {
+        next
+      }
+      sub <- tryCatch(
+        ds |>
+          dplyr::filter(parcel_id %in% pid_unique) |>
+          dplyr::collect(),
+        error = function(e) NULL
+      )
+      if (!is.null(sub) && nrow(sub) > 0L) {
+        parts[[length(parts) + 1L]] <- as.data.table(sub)
+      }
+    }
+    if (length(parts) == 0L) {
+      return(data.table())
+    }
+    rbindlist(parts, use.names = TRUE, fill = TRUE)
+  }
+
+  all_pids <- unique(phenology_full$parcel_id)
+  n_chunk <- ceiling(length(all_pids) / chunk_n)
+  results <- vector("list", n_chunk)
+
+  for (ic in seq_len(n_chunk)) {
+    i0 <- (ic - 1L) * chunk_n + 1L
+    i1 <- min(ic * chunk_n, length(all_pids))
+    pchunk <- all_pids[i0:i1]
+    message("[tillage] chunk ", ic, "/", n_chunk, " parcels ", i0, ":", i1)
+
+    pheno_chunk <- phenology_full[parcel_id %in% pchunk]
+    if (nrow(pheno_chunk) == 0L) {
+      next
+    }
+
+    ndti_chunk <- read_ndti_for_parcels(pchunk, load_years, ndti_root)
+    if (nrow(ndti_chunk) == 0L) {
+      message("  no NDTI rows")
+      next
+    }
+    ndti_chunk[, date := as.Date(date)]
+    ndti_chunk <- merge(ndti_chunk, pft_y, by = c("parcel_id", "year"), all.x = TRUE)
+    ndti_chunk <- ndti_chunk[!is.na(PFT) & nzchar(as.character(PFT))]
+
+    common <- intersect(unique(ndti_chunk$parcel_id), unique(pheno_chunk$parcel_id))
+    if (length(common) == 0L) {
+      message("  no ndti/phenology overlap")
+      next
+    }
+    ndti_chunk <- ndti_chunk[parcel_id %in% common]
+    pheno_chunk <- pheno_chunk[parcel_id %in% common]
+
+    res <- tryCatch(
+      tillage_metrics(ndti_table = ndti_chunk, phenology_table = pheno_chunk),
+      error = function(e) {
+        warning("[tillage] tillage_metrics failed chunk ", ic, ": ", conditionMessage(e))
+        NULL
+      }
+    )
+    if (!is.null(res) && nrow(res) > 0L) {
+      results[[ic]] <- as.data.table(res)
+    }
+  }
+
+  all_res <- rbindlist(results[!vapply(results, is.null, NA)], use.names = TRUE, fill = TRUE)
+  if (nrow(all_res) == 0L) {
+    stop("[tillage] No results (check NDTI overlap and errors above)")
+  }
+
+  all_res[, parcel_id := as.character(parcel_id)]
+  all_res[, year := as.integer(year)]
+
+  for (y in year_first:year_last) {
+    yr_dt <- all_res[year == y]
+    pq <- file.path(out_dir, sprintf("tillage_statewide_%d.parquet", y))
+    js <- file.path(out_dir, sprintf("tillage_statewide_%d.json", y))
+
+    if (nrow(yr_dt) == 0L) {
+      message("[tillage] year ", y, ": no rows; writing empty outputs")
+      write_parquet(
+        data.table(event_type = character(), parcel_id = character(), year = integer()),
+        pq
+      )
+      writeLines("{}", js)
+      next
+    }
+
+    n_pre <- nrow(yr_dt)
+    ord_cols <- intersect(c("parcel_id", "OGMn_date", "min_date", "max_date"), names(yr_dt))
+    if (length(ord_cols) > 0L) {
+      data.table::setorderv(yr_dt, ord_cols)
+    }
+    yr_dt <- unique(yr_dt, by = c("parcel_id", "OGMn_date"))
+    if (nrow(yr_dt) < n_pre) {
+      message(
+        "[tillage] year ", y, ": deduped ", n_pre - nrow(yr_dt),
+        " duplicate row(s) (parcel_id + OGMn_date); kept first after sort"
+      )
+    }
+
+    out_dt <- copy(yr_dt)
+    out_dt[, site_id := parcel_id]
+    out_dt[, event_type := "tillage"]
+    date_cols <- names(out_dt)[vapply(out_dt, function(z) inherits(z, "Date"), NA)]
+    for (cn in date_cols) {
+      out_dt[, (cn) := as.character(get(cn))]
+    }
+    setcolorder(out_dt, c("event_type", setdiff(names(out_dt), "event_type")))
+    write_parquet(out_dt, pq)
+    message("  Wrote ", pq, " (", nrow(out_dt), " rows)")
+
+    json_list <- lapply(split(out_dt, out_dt$site_id), function(rows) {
+      lapply(seq_len(nrow(rows)), function(i) {
+        list(
+          event_type = rows$event_type[i],
+          year = rows$year[i],
+          PFT = rows$PFT[i],
+          OGMn_date = rows$OGMn_date[i],
+          max_date = rows$max_date[i],
+          max_ndti = rows$max_ndti[i],
+          min_date = rows$min_date[i],
+          min_ndti = rows$min_ndti[i],
+          min_n_valid = rows$min_n_valid[i],
+          min_sd = rows$min_sd[i],
+          ndti_pct_change = rows$ndti_pct_change[i],
+          min_val_date_before = rows$min_val_date_before[i],
+          min_val_n_before = rows$min_val_n_before[i],
+          min_val_date_after = rows$min_val_date_after[i],
+          min_val_n_after = rows$min_val_n_after[i]
+        )
+      })
+    })
+    write(toJSON(json_list, auto_unbox = TRUE, pretty = TRUE), js)
+    message("  Wrote ", js)
+  }
+}
+
+message("[make_events_statewide] Done for year=", year_arg)
diff --git a/modules/data.remote/inst/ccmmf/hls/build_hls_parcel_tile_map.R b/modules/data.remote/inst/ccmmf/hls/build_hls_parcel_tile_map.R
new file mode 100644
index 00000000000..04ec1a0d1f4
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/hls/build_hls_parcel_tile_map.R
@@ -0,0 +1,188 @@
+# Build a parcel-year to HLS tile map for California agricultural parcels (LandIQ v4.1).
+# Reads crops_all_years.parquet filtered by agricultural CLASS/SUBCLASS, loads parcel
+# geometries, intersects with the HLS tile grid, and writes RDS plus tile parcel counts.
+#
+# Main inputs: CCMMF_LANDIQ_V4 (LandIQ root), CCMMF_MANAGEMENT (crop lookup, tile extent),
+#   optional CLI year_min year_max overwrite.
+# Main outputs: hls_parcel_tile_map_v4.1_years=MIN-MAX.rds, hls_tile_parcel_counts CSV,
+#   optional removed parcel-years CSV when geometries fail QC.
+# How to run: Rscript scripts/hls/build_hls_parcel_tile_map.R [year_min] [year_max] [overwrite]
+# Workflow: upstream of tilewise MSLSP/NDTI drivers; run build_hls_tile_extent.R first.
+
+suppressPackageStartupMessages({
+  library(sf)
+  library(data.table)
+  library(arrow)
+  library(dplyr)
+})
+sf::sf_use_s2(FALSE)
+
+#### Configuration
+
+path_landiq_v4 <- Sys.getenv("CCMMF_LANDIQ_V4", "/projectnb/dietzelab/ccmmf/LandIQ-harmonized-v4.1")
+path_management <- Sys.getenv("CCMMF_MANAGEMENT", "/projectnb/dietzelab/ccmmf/management")
+path_parcels <- file.path(path_landiq_v4, "parcels-consolidated.gpkg")
+path_crops_parq <- file.path(path_landiq_v4, "crops_all_years.parq")
+path_cropcode_lookup <- file.path(path_management, "LandIQ_cropCode_lookup_table.csv")
+path_tiles <- file.path(path_management, "hls_tile_extent.rds")
+path_out <- path_management
+
+#### Parse arguments
+
+args <- commandArgs(trailingOnly = TRUE)
+year_min <- if (length(args) >= 1) as.integer(args[1]) else 2016L
+year_max <- if (length(args) >= 2) as.integer(args[2]) else 2024L
+overwrite <- length(args) >= 3 && tolower(args[3]) %in% c("overwrite", "true", "t", "1", "yes", "y")
+
+out_file <- file.path(path_out, sprintf("hls_parcel_tile_map_v4.1_years=%d-%d.rds", year_min, year_max))
+if (file.exists(out_file) && !overwrite) quit(save = "no", status = 0)
+if (!file.exists(path_tiles)) {
+  stop("Tile extent not found. Run: Rscript scripts/hls/build_hls_tile_extent.R")
+}
+
+tile_prep <- readRDS(path_tiles)
+tile_extent <- tile_prep$tile_extent_sf
+used_crs <- tile_prep$used_crs
+
+#### Parcel-year rows (agricultural only via CLASS and SUBCLASS join)
+
+# Join on CLASS+SUBCLASS so subclass-level PFT differences (e.g. T19 vs T28 woody) stay correct.
+lookup <- fread(path_cropcode_lookup)
+ag_pairs <- unique(lookup[is_agricultural == TRUE,
+  .(CLASS = trimws(CLASS), SUBCLASS = as.character(SUBCLASS))])
+ag_classes_filter <- unique(ag_pairs$CLASS)
+
+parcel_year_raw <- arrow::open_dataset(path_crops_parq) |>
+  dplyr::filter(year >= year_min, year <= year_max, CLASS %in% ag_classes_filter) |>
+  dplyr::select(parcel_id, year, CLASS, SUBCLASS) |>
+  dplyr::collect() |>
+  as.data.table()
+parcel_year_raw[, CLASS := trimws(as.character(CLASS))]
+parcel_year_raw[, SUBCLASS := as.character(SUBCLASS)]
+parcel_year <- merge(parcel_year_raw, ag_pairs, by = c("CLASS", "SUBCLASS"))[
+  , .(parcel_id = as.character(parcel_id), year = as.integer(year))
+] |> unique()
+parcel_year[, parcel_id := as.character(parcel_id)]
+parcel_year[, year := as.integer(year)]
+message("Parcel-year rows (agricultural, ", year_min, "-", year_max, "): ", nrow(parcel_year))
+
+#### Load parcel geometry in chunks (avoid huge SQL IN lists)
+
+ids <- unique(parcel_year$parcel_id)
+layer <- st_layers(path_parcels)$name[1]
+chunks <- split(ids, ceiling(seq_along(ids) / 5000L))
+geom_chunks <- lapply(chunks, function(x) {
+  esc <- gsub("'", "''", x, fixed = TRUE)
+  q <- sprintf('SELECT * FROM "%s" WHERE parcel_id IN (%s)', layer, paste0("'", esc, "'", collapse = ","))
+  st_read(path_parcels, query = q, quiet = TRUE)
+})
+parcels <- do.call(rbind, geom_chunks)
+parcels$parcel_id <- as.character(parcels$parcel_id)
+
+#### QC: drop invalid or empty geometries (bad WKB can break OGR)
+
+valid <- tryCatch(
+  !sf::st_is_empty(sf::st_geometry(parcels)),
+  error = function(e) {
+    message("Bulk geometry check failed; checking row-by-row for corrupt geometries.")
+    vapply(seq_len(nrow(parcels)), function(i) {
+      tryCatch(!sf::st_is_empty(sf::st_geometry(parcels)[i]), error = function(e) FALSE)
+    }, logical(1))
+  }
+)
+removed_log <- if (any(!valid)) {
+  parcel_year[parcel_id %in% parcels$parcel_id[!valid], .(parcel_id, year)]
+} else {
+  data.table(parcel_id = character(), year = integer())
+}
+parcels <- parcels[valid, ]
+
+#### Reproject to tile CRS (row-by-row fallback if bulk transform fails)
+
+parcels_tr <- tryCatch(sf::st_transform(parcels, used_crs), error = function(e) NULL)
+if (is.null(parcels_tr)) {
+  message("Bulk st_transform failed; checking row-by-row.")
+  chunk_size <- 5000L
+  n <- nrow(parcels)
+  good <- logical(n)
+  for (start in seq(1L, n, by = chunk_size)) {
+    end <- min(start + chunk_size - 1L, n)
+    chk <- tryCatch(sf::st_transform(parcels[start:end, ], used_crs), error = function(e) NULL)
+    if (!is.null(chk)) {
+      good[start:end] <- TRUE
+    } else {
+      for (i in start:end) {
+        good[i] <- tryCatch({
+          sf::st_transform(parcels[i, ], used_crs)
+          TRUE
+        }, error = function(e) FALSE)
+      }
+    }
+  }
+  drop_ids <- parcels$parcel_id[!good]
+  if (length(drop_ids) > 0) {
+    removed_log <- rbind(removed_log, parcel_year[parcel_id %in% drop_ids, .(parcel_id, year)])
+  }
+  parcels <- parcels[good, ]
+  parcels <- sf::st_transform(parcels, used_crs)
+} else {
+  parcels <- parcels_tr
+}
+
+#### Spatial join: parcel polygon intersects tile polygon (any overlap counts)
+
+hits <- tryCatch(sf::st_intersects(parcels, tile_extent), error = function(e) NULL)
+if (is.null(hits)) {
+  message("Bulk st_intersects failed; checking row-by-row.")
+  n <- nrow(parcels)
+  good <- logical(n)
+  for (i in seq_len(n)) {
+    good[i] <- tryCatch({
+      hi <- sf::st_intersects(parcels[i, ], tile_extent)
+      length(hi[[1]]) >= 0
+      TRUE
+    }, error = function(e) FALSE)
+  }
+  drop_ids <- parcels$parcel_id[!good]
+  if (length(drop_ids) > 0) {
+    removed_log <- rbind(removed_log, parcel_year[parcel_id %in% drop_ids, .(parcel_id, year)])
+  }
+  parcels <- parcels[good, ]
+  hits <- sf::st_intersects(parcels, tile_extent)
+}
+keep <- lengths(hits) > 0
+parcels <- parcels[keep, ]
+hits <- hits[keep]
+
+if (nrow(removed_log) > 0) {
+  removed_log <- unique(removed_log)
+  removed_log_file <- file.path(path_out, sprintf("hls_parcel_tile_map_removed_v4.1_years=%d-%d.csv", year_min, year_max))
+  dir.create(path_out, recursive = TRUE, showWarnings = FALSE)
+  fwrite(removed_log, removed_log_file)
+  message("Dropped ", nrow(removed_log), " parcel-years with invalid geometry; log: ", removed_log_file)
+}
+
+#### Build parcel to tiles table and join to parcel_year
+
+tile_by_parcel <- data.table(
+  parcel_id = parcels$parcel_id,
+  tileIDs = vapply(hits, function(i) paste(tile_extent$tile_id[i], collapse = ","), character(1)),
+  n_tiles = lengths(hits)
+)
+setkey(tile_by_parcel, parcel_id)
+setkey(parcel_year, parcel_id)
+out <- tile_by_parcel[parcel_year, nomatch = 0][, .(parcel_id, year, tileIDs, n_tiles)]
+
+#### Tile to parcel counts (for scheduling)
+
+tile_long <- out[, .(tile_id = unlist(strsplit(tileIDs, ",", fixed = TRUE))), by = .(parcel_id, year)]
+tile_counts <- tile_long[, .(n_parcels = .N), by = .(tile_id, year)]
+setorder(tile_counts, tile_id, year)
+
+#### Write outputs
+
+dir.create(path_out, recursive = TRUE, showWarnings = FALSE)
+saveRDS(out, out_file)
+tile_counts_file <- file.path(path_out, sprintf("hls_tile_parcel_counts_v4.1_years=%d-%d.csv", year_min, year_max))
+fwrite(tile_counts, tile_counts_file)
+message("Wrote tile->parcel counts: ", tile_counts_file)
diff --git a/modules/data.remote/inst/ccmmf/hls/combine_mslsp_tilepieces.R b/modules/data.remote/inst/ccmmf/hls/combine_mslsp_tilepieces.R
new file mode 100644
index 00000000000..90ab1fb9b3b
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/hls/combine_mslsp_tilepieces.R
@@ -0,0 +1,69 @@
+#!/usr/bin/env Rscript
+# Combine per-tile MSLSP tilepieces into one annual Parquet for a year.
+# Aggregates duplicate parcels at tile boundaries with weighted means and QA modes.
+#
+# Main inputs: prep from mslsp_prep_static_tilewise(year); tilepieces under tilepieces_year=.
+# Main outputs: mslsp_year=Y.parquet in the year output directory.
+# How to run: Rscript combine_mslsp_tilepieces.R <year> [overwrite], or via driver combine.
+# Workflow: monitoring workflow merge step for MSLSP.
+
+script_dir <- if (length(file_arg <- commandArgs(trailingOnly = FALSE)[grepl("^--file=",
+                   commandArgs(trailingOnly = FALSE))])) {
+  dirname(sub("^--file=", "", file_arg[1]))
+} else "."
+source(file.path(script_dir, "extract_summary_core.R"))
+source(file.path(script_dir, "tilewise_mslsp_implementation.R"))
+
+#### Combine function
+# Read all tilepieces for a year, aggregate cross-tile duplicates, write Parquet.
+# Weighted aggregation across tiles (when a parcel spans multiple tiles):
+#   metric mean: SUM(w_valid * metric_mean) / SUM(w_valid)
+#   metric sd:   derived from parallel variance identity
+#   QA mode:     weighted mode (tile with greatest coverage area wins)
+#   na_frac:     weighted mean of per-tile na_frac values
+mslsp_combine <- function(prep, time_key, overwrite = FALSE, verbose = TRUE) {
+  year <- prep$year
+
+  tilepieces_dir <- file.path(prep$out_dir, sprintf("tilepieces_year=%d", year))
+  out_path       <- file.path(prep$out_dir, sprintf("mslsp_year=%d.parquet", year))
+
+  if (file.exists(out_path) && !overwrite) {
+    if (verbose) message("[combine] skip (exists): ", out_path)
+    return(invisible(out_path))
+  }
+
+  tile_files <- list.files(tilepieces_dir, "^tile=.*\\.csv\\.gz$", full.names = TRUE)
+  if (length(tile_files) == 0) stop("No tilepieces found in: ", tilepieces_dir)
+  if (verbose) message("[combine] ", length(tile_files), " tilepieces -> ", out_path)
+
+  dt <- rbindlist(lapply(tile_files, function(f) {
+    tryCatch(fread(f, showProgress = FALSE), error = function(e) NULL)
+  }), fill = TRUE, use.names = TRUE)
+
+  if (nrow(dt) == 0) {
+    if (verbose) message("[combine] no rows - writing empty parquet")
+    arrow::write_parquet(dt, out_path)
+    return(invisible(out_path))
+  }
+
+  agg <- mslsp_aggregate_tilepieces(dt, year)
+  arrow::write_parquet(agg, out_path)
+  if (verbose) message("[combine] wrote ", nrow(agg), " rows")
+  invisible(out_path)
+}
+
+#### CLI
+
+if (sys.nframe() == 0) {
+  args           <- commandArgs(trailingOnly = TRUE)
+  is_ow          <- function(x) tolower(x) %in% c("true", "t", "yes", "y", "overwrite")
+  overwrite_flag <- any(sapply(args, is_ow))
+
+  if (length(args) < 1) stop(
+    "Usage: Rscript combine_mslsp_tilepieces.R <year> [overwrite]"
+  )
+
+  year_arg <- as.integer(args[1])
+  prep     <- mslsp_prep_static_tilewise(year_arg)
+  mslsp_combine(prep, 1L, overwrite = overwrite_flag)
+}
diff --git a/modules/data.remote/inst/ccmmf/hls/combine_ndti_tilepieces.R b/modules/data.remote/inst/ccmmf/hls/combine_ndti_tilepieces.R
new file mode 100644
index 00000000000..93a23753303
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/hls/combine_ndti_tilepieces.R
@@ -0,0 +1,118 @@
+#!/usr/bin/env Rscript
+# Combine per-tile NDTI tilepieces into one monthly Parquet for a year and month.
+# Aggregates duplicate (parcel_id, date) rows with weighted mean and variance rules.
+#
+# Main inputs: prep from ndti_prep_static_tilewise(year); tilepieces for that month.
+# Main outputs: ndti_year=Y_month=MM.parquet under the year directory.
+# How to run: Rscript combine_ndti_tilepieces.R <year> <month> [overwrite], or via driver.
+# Workflow: monitoring workflow merge step for NDTI.
+
+script_dir <- if (length(file_arg <- commandArgs(trailingOnly = FALSE)[grepl("^--file=", commandArgs(trailingOnly = FALSE))])) {
+  dirname(sub("^--file=", "", file_arg[1]))
+} else "."
+source(file.path(script_dir, "tilewise_ndti_implementation.R"))
+
+#### Combine function
+# Read all tilepieces for one month, aggregate cross-tile duplicates, write parquet.
+# Weighted aggregation across tiles:
+#   mean:    sum(w * x) / sum(w)
+#   sd:      via parallel variance identity: Var = E[X^2] - E[X]^2
+#   na_frac: recover w_total = w_valid / (1 - na_frac) per tile row, then
+#            re-derive na_frac = 1 - sum(w_valid) / sum(w_total) across tiles
+ndti_combine <- function(prep, month, overwrite = FALSE, verbose = TRUE) {
+  year  <- prep$year
+  month <- as.integer(month)
+
+  out_path <- path_monthly_output(prep$out_dir, year, month)
+  if (file.exists(out_path) && !overwrite) {
+    if (verbose) message("[combine] skip (exists): ", out_path)
+    return(invisible(out_path))
+  }
+
+  tilepieces_dir <- path_tilepieces(prep$out_dir, year, month)
+  tile_files     <- list.files(tilepieces_dir, "^tile=.*\\.csv\\.gz$", full.names = TRUE)
+  if (length(tile_files) == 0) stop("No tilepieces found in: ", tilepieces_dir)
+  if (verbose) message("[combine] ", length(tile_files), " tilepieces -> ", out_path)
+
+  col_types <- list(
+    character = "parcel_id",
+    integer   = "n_valid",
+    double    = c("ndti_mean", "ndti_sd", "w_valid", "sum_w2", "na_frac")
+    # date and parcel_id read as character; date is cast after rbindlist
+  )
+
+  # Read all tiles; fread handles .gz natively
+  all_tiles <- rbindlist(
+    lapply(tile_files, function(f) {
+      dt <- tryCatch(fread(f, colClasses = col_types), error = function(e) NULL)
+      if (is.null(dt) || nrow(dt) == 0) return(NULL)
+      dt
+    }),
+    fill = TRUE
+  )
+
+  all_tiles <- all_tiles[!is.na(ndti_mean)]
+  all_tiles[, date := as.Date(date)]
+
+  if (nrow(all_tiles) == 0) {
+    if (verbose) message("[combine] no non-null rows - writing empty parquet")
+    result <- data.table(
+      parcel_id = character(), year = integer(),
+      date      = as.Date(integer(0), origin = "1970-01-01"),
+      ndti_mean = double(), ndti_sd = double(),
+      n_valid   = integer(), w_valid = double(),
+      sum_w2    = double(),  na_frac = double()
+    )
+    arrow::write_parquet(result, out_path)
+    return(invisible(out_path))
+  }
+
+  # Recover per-row w_total for na_frac re-derivation across tiles
+  all_tiles[, w_total := fifelse(!is.na(na_frac) & na_frac < 1,
+                                 w_valid / (1 - na_frac), 0)]
+
+  result <- all_tiles[, {
+    sw  <- sum(w_valid)
+    mu  <- sum(w_valid * ndti_mean) / sw
+    wt  <- sum(w_total)
+    .(
+      ndti_mean = mu,
+      # parallel variance: E[X^2] - E[X]^2
+      ndti_sd   = sqrt(pmax(0,
+        sum(w_valid * (ndti_sd^2 + ndti_mean^2)) / sw - mu^2
+      )),
+      n_valid   = sum(as.integer(n_valid)),
+      w_valid   = sw,
+      sum_w2    = sum(sum_w2),
+      na_frac   = fifelse(wt > 0, 1 - sw / wt, NA_real_)
+    )
+  }, by = .(parcel_id, date)]
+
+  setorder(result, parcel_id, date)
+  result[, year := year]
+  setcolorder(result, c("parcel_id", "year", "date",
+                        "ndti_mean", "ndti_sd", "n_valid", "w_valid", "sum_w2", "na_frac"))
+
+  arrow::write_parquet(result, out_path)
+  if (verbose) message("[combine] wrote ", nrow(result), " rows")
+  invisible(out_path)
+}
+
+#### CLI
+
+if (sys.nframe() == 0) {
+  args           <- commandArgs(trailingOnly = TRUE)
+  is_ow          <- function(x) tolower(x) %in% c("true", "t", "yes", "y", "overwrite")
+  overwrite_flag <- any(sapply(args, is_ow))
+
+  if (length(args) < 2) stop(
+    "Usage: Rscript combine_ndti_tilepieces.R <year> <month 1-12> [overwrite]"
+  )
+
+  year_arg  <- as.integer(args[1])
+  month_arg <- suppressWarnings(as.integer(args[2]))
+  if (is.na(month_arg) || month_arg < 1L || month_arg > 12L) stop("Month must be 1-12")
+
+  prep <- ndti_prep_static_tilewise(year_arg)
+  ndti_combine(prep, month_arg, overwrite = overwrite_flag)
+}
diff --git a/modules/data.remote/inst/ccmmf/hls/extract_summary_core.R b/modules/data.remote/inst/ccmmf/hls/extract_summary_core.R
new file mode 100644
index 00000000000..4be6294ae6a
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/hls/extract_summary_core.R
@@ -0,0 +1,95 @@
+# Weighted summary helpers for HLS polygon extraction (MSLSP-style products).
+# Computes weighted mean, sd, QA mode, coverage weights, and na_frac for parcel summaries.
+#
+# Main inputs: data.table from raster extraction with ID, weight, layer columns.
+# Main outputs: summarized data.table rows per parcel_id (or id_col).
+# How to run: sourced by tilewise_mslsp_implementation.R; not a standalone CLI.
+# Workflow: shared by MSLSP tile extraction and combine steps.
+
+suppressPackageStartupMessages(library(data.table))
+
+#### Weighted statistics
+# Returns mean, sd, n_valid, w_valid, sum_w2, na_frac for SE and quality flags.
+weighted_stats <- function(x, w) {
+  ok <- !is.na(x) & !is.na(w) & w > 0
+  if (!any(ok)) {
+    return(list(mean = NA_real_, sd = NA_real_, n_valid = 0L,
+                w_valid = 0, sum_w2 = 0, na_frac = NA_real_))
+  }
+  x2 <- as.numeric(x[ok])
+  w2 <- as.numeric(w[ok])
+  wsum <- sum(w2)
+  mu <- sum(w2 * x2) / wsum
+  sd <- sqrt(sum(w2 * (x2 - mu)^2) / wsum)
+
+  w_all <- w[!is.na(w) & w > 0]
+  w_na  <- w[is.na(x) & !is.na(w) & w > 0]
+  na_frac <- if (length(w_all) == 0) NA_real_ else sum(w_na, na.rm = TRUE) / sum(w_all, na.rm = TRUE)
+
+  list(
+    mean    = as.numeric(mu),
+    sd      = as.numeric(sd),
+    n_valid = as.integer(length(x2)),
+    w_valid = as.numeric(wsum),
+    sum_w2  = as.numeric(sum(w2^2)),
+    na_frac = as.numeric(na_frac)
+  )
+}
+
+weighted_mode_stats <- function(x, w) {
+  ok <- !is.na(x) & !is.na(w) & w > 0
+  if (!any(ok)) return(list(mode = NA_real_, mode_frac = NA_real_))
+  dt <- data.table(x = x[ok], w = w[ok])
+  agg <- dt[, .(w_sum = sum(w)), by = x][order(-w_sum)]
+  mode_val <- agg$x[1]
+  mode_frac <- agg$w_sum[1] / sum(agg$w_sum)
+  list(mode = as.numeric(mode_val), mode_frac = as.numeric(mode_frac))
+}
+
+# One row per polygon: coverage stats, weighted mean/sd per continuous layer, QA mode per QA layer.
+summarize_extract <- function(ex_dt, id_vec, layer_names, qa_names = character(), id_col = "parcel_id") {
+  stopifnot("ID" %in% names(ex_dt), "weight" %in% names(ex_dt))
+  stopifnot(length(id_vec) >= max(ex_dt$ID, na.rm = TRUE))
+
+  dt <- as.data.table(ex_dt)
+  dt[, (id_col) := id_vec[ID]]
+  wcol <- "weight"
+
+  cont_layers <- setdiff(layer_names, qa_names)
+  if (length(cont_layers) == 0) stop("No continuous layers to compute coverage stats.")
+  coverage_layer <- cont_layers[1]
+
+  sup <- dt[, {
+    s <- weighted_stats(get(coverage_layer), get(wcol))
+    list(n_valid = as.integer(s$n_valid), w_valid = as.numeric(s$w_valid),
+         sum_w2 = as.numeric(s$sum_w2), na_frac = as.numeric(s$na_frac))
+  }, by = c(id_col)]
+
+  cont_out <- dt[, {
+    out <- list()
+    for (nm in cont_layers) {
+      s <- weighted_stats(get(nm), get(wcol))
+      out[[paste0(nm, "_mean")]] <- as.numeric(s$mean)
+      out[[paste0(nm, "_sd")]] <- as.numeric(s$sd)
+    }
+    out
+  }, by = c(id_col)]
+
+  qa_out <- NULL
+  if (length(qa_names) > 0) {
+    qa_out <- dt[, {
+      out <- list()
+      for (q in qa_names) {
+        m <- weighted_mode_stats(get(q), get(wcol))
+        out[[paste0(q, "_mode")]] <- as.numeric(m$mode)
+        out[[paste0(q, "_mode_frac")]] <- as.numeric(m$mode_frac)
+      }
+      out
+    }, by = c(id_col)]
+  }
+
+  res <- merge(sup, cont_out, by = id_col, all = TRUE)
+  if (!is.null(qa_out)) res <- merge(res, qa_out, by = id_col, all = TRUE)
+  res[]
+}
+
diff --git a/modules/data.remote/inst/ccmmf/hls/tilewise_core.R b/modules/data.remote/inst/ccmmf/hls/tilewise_core.R
new file mode 100644
index 00000000000..4fdccac76c8
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/hls/tilewise_core.R
@@ -0,0 +1,411 @@
+# Shared tilewise orchestration for HLS-derived products (MSLSP, NDTI).
+# Implements prep-static, per-tile extract to tilepieces, combine to Parquet, and optional merge.
+#
+# Main inputs: product object (see Product interface at bottom of file).
+# Main outputs: tilepiece CSV.gz dirs and product-specific Parquet paths.
+# How to run: sourced from tilewise_*_driver.R; not a standalone CLI.
+# Workflow: core of monitoring workflow stages S2, S5 for raster summaries.
+
+suppressPackageStartupMessages({
+  library(sf)
+  library(data.table)
+  library(stringr)
+})
+
+#### Logging
+# Timestamped log to console and optional file. Driver calls tilewise_log_init()
+# to enable file sink; tw_log() works console-only if not set.
+
+.tw_state <- new.env(parent = emptyenv())
+.tw_state$log_con  <- NULL
+.tw_state$log_path <- NULL
+
+tilewise_log_init <- function(log_path) {
+  dir.create(dirname(log_path), recursive = TRUE, showWarnings = FALSE)
+  .tw_state$log_con  <- file(log_path, open = "at")
+  .tw_state$log_path <- log_path
+  do.call("on.exit", list(quote(tilewise_log_close()), add = TRUE), envir = parent.frame())
+  tw_log("INFO", "=== run started  pid=", Sys.getpid(), " ===")
+  message("[log] writing to: ", log_path)
+  invisible(log_path)
+}
+
+tilewise_log_close <- function() {
+  if (!is.null(.tw_state$log_con)) {
+    tw_log("INFO", "=== run finished ===")
+    close(.tw_state$log_con)
+    .tw_state$log_con  <- NULL
+    .tw_state$log_path <- NULL
+  }
+}
+
+tw_log <- function(level = "INFO", ...) {
+  msg  <- paste0(c(...), collapse = "")
+  ts   <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
+  line <- sprintf("%s [%-5s] %s", ts, level, msg)
+  message(line)
+  if (!is.null(.tw_state$log_con)) {
+    writeLines(line, .tw_state$log_con)
+    flush(.tw_state$log_con)
+  }
+}
+
+#### Generic helpers
+
+parcel_id_to_bucket <- function(parcel_ids, n_buckets) {
+  x <- suppressWarnings(as.integer(parcel_ids))
+  x[is.na(x)] <- 0L
+  (abs(x) %% as.integer(n_buckets)) + 1L
+}
+
+append_to_csv <- function(data, filepath) {
+  if (file.exists(filepath)) {
+    fwrite(data, filepath, append = TRUE, col.names = FALSE)
+  } else {
+    fwrite(data, filepath)
+  }
+}
+
+concat_gzipped_csvs <- function(input_files, output_file) {
+  input_files <- sort(input_files[file.exists(input_files)])
+  if (length(input_files) == 0) stop("No input files to combine")
+  if (length(input_files) == 1) {
+    cmd <- sprintf("zcat %s | gzip -c > %s", shQuote(input_files[1]), shQuote(output_file))
+  } else {
+    rest <- paste(shQuote(input_files[-1]), collapse = " ")
+    cmd <- sprintf(
+      "(zcat %s; for f in %s; do zcat \"$f\" | sed '1d'; done) | gzip -c > %s",
+      shQuote(input_files[1]), rest, shQuote(output_file)
+    )
+  }
+  if (system(cmd) != 0L) stop("Concatenation failed: ", output_file)
+  invisible(output_file)
+}
+
+# Build a list mapping tile_id -> row indices in prep$polys.
+# prep$polys must have a tile_ids list column (each element is a character vector
+# of tile IDs the parcel overlaps).
+build_tile_to_parcel_indices <- function(polys) {
+  tile_ids_list <- polys$tile_ids
+  rows  <- rep.int(seq_len(nrow(polys)), lengths(tile_ids_list))
+  tiles <- unlist(tile_ids_list, use.names = FALSE)
+  ok    <- !is.na(tiles) & nzchar(tiles)
+  split(rows[ok], tiles[ok])
+}
+
+sanitize_tile_id <- function(id) gsub("[^0-9A-Za-z]+", "_", id)
+
+#### tilewise_run (extract per tile and scene)
+
+tilewise_run <- function(prep, time_key, product, overwrite = FALSE, verbose = TRUE) {
+  year     <- prep$year
+  time_key <- as.integer(time_key)
+
+  scene_index_dt <- product$scene_index(year, time_key, verbose = verbose)
+  if (is.null(scene_index_dt) || nrow(scene_index_dt) == 0) {
+    if (verbose) tw_log("WARN", "no scenes for year=", year, " time_key=", time_key)
+    return(invisible(NULL))
+  }
+  tile_col <- product$scene_index_tile_col
+  if (!tile_col %in% names(scene_index_dt)) stop("scene_index must have column: ", tile_col)
+
+  tile_to_indices <- build_tile_to_parcel_indices(prep$polys)
+  tile_ids <- sort(names(tile_to_indices))
+
+  # Smoke-test: restrict to one tile via env var.
+  #   TILEWISE_ONE_TILE=1       -> first tile
+  #   TILEWISE_ONE_TILE=10SFF   -> specific tile
+  one_tile <- Sys.getenv("TILEWISE_ONE_TILE", "")
+  if (nzchar(one_tile) && length(tile_ids) > 0) {
+    one_tile_lower <- tolower(one_tile)
+    if (one_tile_lower %in% c("1", "true", "yes", "y", "first")) {
+      tile_ids <- tile_ids[1]
+    } else if (one_tile %in% tile_ids) {
+      tile_ids <- one_tile
+    } else {
+      stop("TILEWISE_ONE_TILE is set but tile not found in prep: ", one_tile)
+    }
+    if (verbose) tw_log("INFO", "TILEWISE_ONE_TILE -> ", tile_ids)
+  }
+
+  tilepieces_dir <- product$path_tilepieces(prep$out_dir, year, time_key)
+  dir.create(tilepieces_dir, recursive = TRUE, showWarnings = FALSE)
+
+  relevant_scene_rows <- scene_index_dt[get(tile_col) %in% tile_ids]
+  if (verbose) {
+    tw_log("INFO",
+      "year=", year, " time_key=", time_key,
+      " tiles=", length(tile_ids),
+      " relevant_scenes=", nrow(relevant_scene_rows)
+    )
+  }
+
+  # Per-tile timing: one row per processed tile, written to tilepieces_dir at the end.
+  timing_rows <- list()
+
+  write_empty_tilepiece <- function(tmp, gz) {
+    empty <- product$empty_tilepiece_schema()
+    fwrite(empty, tmp)
+    gz_status <- system2("gzip", c("-f", tmp))
+    if (gz_status != 0L) {
+      tw_log("WARN", "gzip failed for empty tilepiece ", tmp, "; keeping .csv")
+    } else {
+      renamed <- file.rename(paste0(tmp, ".gz"), gz)
+      if (!isTRUE(renamed)) tw_log("WARN", "rename failed for empty tilepiece ", tmp)
+    }
+  }
+
+  for (tile_id in tile_ids) {
+    parcel_indices <- tile_to_indices[[tile_id]]
+    if (length(parcel_indices) == 0) next
+
+    tile_safe  <- sanitize_tile_id(tile_id)
+    output_gz  <- file.path(tilepieces_dir, sprintf("tile=%s.csv.gz", tile_safe))
+    output_csv <- file.path(tilepieces_dir, sprintf("tile=%s.csv",    tile_safe))
+    if ((file.exists(output_gz) || file.exists(output_csv)) && !overwrite) next
+
+    output_tmp <- output_csv
+    if (file.exists(output_tmp)) file.remove(output_tmp)
+
+    tile_start  <- proc.time()[["elapsed"]]
+    start_ts    <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
+    n_rows      <- 0L
+
+    # Scenes for this tile - fetched before geometry so prepare_tile can use CRS.
+    # Use cur_tile (not tile_id) so data.table doesn't match the column of same name.
+    cur_tile <- tile_id
+    scenes_this_tile <- scene_index_dt[get(tile_col) == cur_tile]
+
+    if (nrow(scenes_this_tile) == 0) {
+      if (verbose) tw_log("INFO", "tile=", tile_id, " 0 scenes - writing empty tilepiece")
+      write_empty_tilepiece(output_tmp, output_gz)
+      timing_rows[[length(timing_rows) + 1]] <- list(
+        tile_id = tile_id, n_parcels = length(parcel_indices), n_scenes = 0L,
+        n_rows_written = 0L, status = "empty_no_scenes",
+        elapsed_sec = round(proc.time()[["elapsed"]] - tile_start, 1),
+        start_ts = start_ts, end_ts = format(Sys.time(), "%Y-%m-%d %H:%M:%S")
+      )
+      next
+    }
+
+    # Per-tile geometry context.
+    # If the product defines prepare_tile, call it (e.g. NDTI loads geometry
+    # lazily from the GPKG for just this tile's parcels, already reprojected).
+    # Otherwise fall back to slicing prep$polys (MSLSP loads all geometry upfront).
+    if (!is.null(product$prepare_tile)) {
+      parcel_ids_tile <- prep$polys$parcel_id[parcel_indices]
+      tile_parcels <- product$prepare_tile(prep, tile_id, parcel_ids_tile, scenes_this_tile)
+    } else {
+      tile_parcels <- prep$polys[parcel_indices, ]
+      tile_parcels$tile_ids <- replicate(nrow(tile_parcels), tile_id, simplify = FALSE)
+    }
+
+    if (is.null(tile_parcels) || nrow(tile_parcels) == 0) {
+      if (verbose) tw_log("WARN", "tile=", tile_id, " no parcel geometry - writing empty tilepiece")
+      write_empty_tilepiece(output_tmp, output_gz)
+      timing_rows[[length(timing_rows) + 1]] <- list(
+        tile_id = tile_id, n_parcels = length(parcel_indices), n_scenes = nrow(scenes_this_tile),
+        n_rows_written = 0L, status = "empty_no_geometry",
+        elapsed_sec = round(proc.time()[["elapsed"]] - tile_start, 1),
+        start_ts = start_ts, end_ts = format(Sys.time(), "%Y-%m-%d %H:%M:%S")
+      )
+      next
+    }
+
+    if (verbose) tw_log("INFO",
+      "tile=", tile_id,
+      " parcels=", nrow(tile_parcels),
+      " scenes=", nrow(scenes_this_tile))
+
+    tile_status <- "ok"
+    for (scene_idx in seq_len(nrow(scenes_this_tile))) {
+      scene_row <- scenes_this_tile[scene_idx]
+      result    <- tryCatch(
+        product$process_scene(prep, scene_row, tile_parcels, tile_id),
+        error = function(e) {
+          tw_log("ERROR", "tile=", tile_id, " scene=", scene_idx, " ", conditionMessage(e))
+          tile_status <<- "error"
+          NULL
+        }
+      )
+      if (!is.null(result) && nrow(result) > 0) {
+        result[, tile_id := tile_id]
+        append_to_csv(result, output_tmp)
+        n_rows <- n_rows + nrow(result)
+      }
+    }
+
+    if (file.exists(output_tmp)) {
+      gz_status <- system2("gzip", c("-f", output_tmp))
+      if (gz_status != 0L) {
+        tw_log("WARN", "gzip failed for ", output_tmp, "; keeping .csv and continuing")
+      } else {
+        renamed <- file.rename(paste0(output_tmp, ".gz"), output_gz)
+        if (!isTRUE(renamed)) {
+          tw_log("WARN", "rename to canonical .csv.gz failed for ", output_tmp)
+        }
+      }
+    } else {
+      write_empty_tilepiece(output_tmp, output_gz)
+    }
+
+    elapsed <- round(proc.time()[["elapsed"]] - tile_start, 1)
+    if (verbose) tw_log("TIME", "tile=", tile_id, " done  rows=", n_rows, " elapsed=", elapsed, "s")
+
+    timing_rows[[length(timing_rows) + 1]] <- list(
+      tile_id = tile_id, n_parcels = nrow(tile_parcels), n_scenes = nrow(scenes_this_tile),
+      n_rows_written = n_rows, status = tile_status,
+      elapsed_sec = elapsed,
+      start_ts = start_ts, end_ts = format(Sys.time(), "%Y-%m-%d %H:%M:%S")
+    )
+  }
+
+  # Write per-tile timing summary so slow/failed tiles are easy to spot.
+  if (length(timing_rows) > 0) {
+    timing_dt <- rbindlist(timing_rows)
+    timing_dt[, `:=`(year = year, time_key = time_key)]
+    timing_path <- file.path(tilepieces_dir, "_tile_timing.csv")
+    fwrite(timing_dt, timing_path)
+    if (verbose) tw_log("INFO", "tile timing written to: ", timing_path)
+  }
+
+  invisible(tilepieces_dir)
+}
+
+#### tilewise_combine (aggregate tilepieces)
+
+tilewise_combine <- function(prep, time_key, product, n_buckets = 256L,
+                             overwrite = FALSE, verbose = TRUE) {
+  year     <- prep$year
+  time_key <- as.integer(time_key)
+
+  # Products that define combine() bypass the generic bucket
+  # scatter/aggregate path entirely.
+  if (!is.null(product$combine)) {
+    return(product$combine(prep, time_key, overwrite = overwrite, verbose = verbose))
+  }
+
+  #### Generic bucket combine (products without custom combine)
+  tilepieces_dir <- product$path_tilepieces(prep$out_dir, year, time_key)
+  parts_dir      <- product$path_combine_parts(prep$out_dir, year, time_key)
+  tile_files     <- list.files(tilepieces_dir, "^tile=.*\\.csv(\\.gz)?$", full.names = TRUE)
+  if (length(tile_files) == 0) stop("No tilepieces in: ", tilepieces_dir)
+
+  dir.create(parts_dir, recursive = TRUE, showWarnings = FALSE)
+  done_marker <- file.path(parts_dir, "DONE.marker")
+  if (file.exists(done_marker) && !overwrite) {
+    if (verbose) tw_log("INFO", "[combine] already done: ", parts_dir)
+    return(invisible(parts_dir))
+  }
+
+  tmp_dir      <- file.path(parts_dir, "tmp_buckets")
+  dir.create(tmp_dir, recursive = TRUE, showWarnings = FALSE)
+  bucket_paths <- file.path(tmp_dir, sprintf("bucket=%03d.csv", seq_len(n_buckets)))
+  if (overwrite) for (p in bucket_paths) if (file.exists(p)) file.remove(p)
+
+  # Pass 1: scatter rows into buckets so all rows for one parcel land together.
+  if (verbose) tw_log("INFO", "[combine] pass1: scatter into ", n_buckets, " buckets")
+  for (f in tile_files) {
+    dt <- try(fread(f, showProgress = FALSE), silent = TRUE)
+    if (inherits(dt, "try-error") || nrow(dt) == 0) next
+    if (!product$validate_tilepiece(dt)) next
+    dt_scatter <- product$prepare_for_scatter(dt, n_buckets)
+    by_bucket  <- split(dt_scatter, dt_scatter$bucket)
+    for (bn in names(by_bucket)) {
+      append_to_csv(product$scatter_cols(by_bucket[[bn]]), bucket_paths[as.integer(bn)])
+    }
+  }
+
+  # Pass 2: aggregate each bucket independently (bounded memory).
+  if (verbose) tw_log("INFO", "[combine] pass2: aggregate buckets")
+  empty_part <- product$empty_part_schema(year)
+  for (b in seq_len(n_buckets)) {
+    part_file <- file.path(parts_dir, sprintf("part=%03d.csv.gz", b))
+    if (!file.exists(bucket_paths[b])) {
+      fwrite(empty_part, sub("\\.gz$", "", part_file))
+      system2("gzip", c("-f", sub("\\.gz$", "", part_file)))
+      next
+    }
+    dt <- fread(bucket_paths[b], showProgress = FALSE)
+    if (nrow(dt) == 0) {
+      fwrite(empty_part, sub("\\.gz$", "", part_file))
+      system2("gzip", c("-f", sub("\\.gz$", "", part_file)))
+      next
+    }
+    agg <- product$aggregate_bucket(dt, year)
+    fwrite(agg, sub("\\.gz$", "", part_file))
+    system2("gzip", c("-f", sub("\\.gz$", "", part_file)))
+  }
+  writeLines(c(
+    sprintf("year=%d", year),
+    sprintf("time_key=%d", time_key),
+    sprintf("n_buckets=%d", n_buckets)
+  ), done_marker)
+  unlink(tmp_dir, recursive = TRUE, force = TRUE)
+  invisible(parts_dir)
+}
+
+#### tilewise_merge (concatenate bucket parts)
+
+tilewise_merge <- function(prep, time_key, product, overwrite = FALSE, verbose = TRUE) {
+  year     <- prep$year
+  time_key <- as.integer(time_key)
+
+  # Products that define combine() already wrote the final output there.
+  if (!is.null(product$combine)) {
+    out_path <- product$path_final_output(prep$out_dir, year, time_key)
+    if (file.exists(out_path)) {
+      if (verbose) message("[merge] skipped - combine already produced: ", out_path)
+      return(invisible(out_path))
+    }
+    # Final output missing - run combine now.
+    if (verbose) message("[merge] final output missing, running combine")
+    return(product$combine(prep, time_key, overwrite = overwrite, verbose = verbose))
+  }
+
+  # Concatenate bucket parts into final output.
+  parts_dir  <- product$path_combine_parts(prep$out_dir, year, time_key)
+  part_files <- sort(list.files(parts_dir, "^part=[0-9]+\\.csv\\.gz$", full.names = TRUE))
+  if (length(part_files) == 0) stop("No part files in: ", parts_dir)
+  output_path <- product$path_final_output(prep$out_dir, year, time_key)
+  if (file.exists(output_path) && !overwrite) {
+    if (verbose) tw_log("INFO", "[merge] skip (exists): ", output_path)
+    return(invisible(output_path))
+  }
+  if (verbose) tw_log("INFO", "[merge] concatenating ", length(part_files), " parts -> ", output_path)
+  concat_gzipped_csvs(part_files, output_path)
+  invisible(output_path)
+}
+
+tilewise_prep_static <- function(year, product, ...) {
+  product$prep_static(year, ...)
+}
+
+#### Product interface
+# Required:
+#   product$prep_static(year, ...)
+#     -> list(year, polys, out_dir)
+#        polys must have columns: parcel_id, tile_ids (list of tile ID strings)
+#        polys may be an sf object (MSLSP) or a plain data.table (NDTI)
+#   product$scene_index(year, time_key, verbose) -> data.table with tile column
+#   product$scene_index_tile_col -> column name in scene_index result
+#   product$process_scene(prep, scene_row, tile_parcels, tile_id) -> data.table or NULL
+#   product$path_tilepieces(out_dir, year, time_key) -> directory path
+#   product$path_final_output(out_dir, year, time_key) -> file path
+#   product$empty_tilepiece_schema() -> zero-row data.table with correct columns
+#
+# Optional:
+#   product$prepare_tile(prep, tile_id, parcel_ids, scenes_this_tile)
+#     -> tile-specific context (e.g. sf geometry) passed to process_scene
+#        when NULL, framework slices prep$polys[parcel_indices, ]
+#   product$combine(prep, time_key, overwrite, verbose)
+#     -> full combine: read tilepieces, aggregate, write final output
+#        when NULL, framework uses generic bucket scatter/aggregate/merge
+#
+# Required only when combine is NULL (generic bucket path):
+#   product$path_combine_parts(out_dir, year, time_key) -> parts directory
+#   product$validate_tilepiece(dt) -> logical
+#   product$prepare_for_scatter(dt, n_buckets) -> dt with bucket column
+#   product$scatter_cols(dt) -> dt without bucket column
+#   product$aggregate_bucket(dt, year) -> aggregated dt
+#   product$empty_part_schema(year) -> zero-row data.table
diff --git a/modules/data.remote/inst/ccmmf/hls/tilewise_mslsp_driver.R b/modules/data.remote/inst/ccmmf/hls/tilewise_mslsp_driver.R
new file mode 100644
index 00000000000..64bd9c11a09
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/hls/tilewise_mslsp_driver.R
@@ -0,0 +1,72 @@
+#!/usr/bin/env Rscript
+# MSLSP tilewise driver: prep-static, extract, combine, or all for one year.
+# Uses tilewise_core plus MSLSP implementation and combine_mslsp_tilepieces.R.
+#
+# Main inputs: year, command prep-static|extract|combine|all, optional overwrite.
+# Main outputs: tilepiece CSV.gz under phenology/raw_mslsp_v4.1, then mslsp_year=Y.parquet.
+# How to run: Rscript tilewise_mslsp_driver.R <year> [prep-static|extract|combine|all] [overwrite]
+# Workflow: monitoring workflow stage S1-S5 for MSLSP (see monitoring_workflow_flowchart.mmd).
+
+script_dir <- if (length(file_arg <- commandArgs(trailingOnly = FALSE)[grepl("^--file=",
+                   commandArgs(trailingOnly = FALSE))])) {
+  dirname(sub("^--file=", "", file_arg[1]))
+} else "."
+
+# Arrow registers filesystem handlers lazily. Touch parquet before sf/terra/GDAL load.
+suppressPackageStartupMessages(library(arrow))
+local({
+  tmp <- tempfile(fileext = ".parquet")
+  on.exit(unlink(tmp))
+  arrow::write_parquet(data.frame(x = 1L), tmp)
+  arrow::read_parquet(tmp)
+})
+
+source(file.path(script_dir, "extract_summary_core.R"))
+source(file.path(script_dir, "tilewise_core.R"))
+source(file.path(script_dir, "tilewise_mslsp_implementation.R"))
+source(file.path(script_dir, "combine_mslsp_tilepieces.R"))
+
+product <- product_mslsp()
+
+args <- commandArgs(trailingOnly = TRUE)
+is_overwrite   <- function(x) tolower(x) %in% c("true", "t", "yes", "y", "overwrite")
+overwrite_flag <- any(sapply(args, is_overwrite))
+args           <- args[!sapply(args, is_overwrite)]
+
+if (length(args) < 1) {
+  stop("Usage: Rscript tilewise_mslsp_driver.R <year> [prep-static|extract|combine|all] [overwrite]")
+}
+
+year_arg   <- as.integer(args[1])
+command    <- if (length(args) >= 2 && nzchar(args[2])) tolower(args[2]) else "all"
+mslsp_time_key <- 1L
+
+ts      <- format(Sys.time(), "%Y%m%d_%H%M%S")
+log_dir <- file.path(mslsp_out_root, sprintf("year=%d", year_arg), "logs")
+tilewise_log_init(file.path(log_dir,
+  sprintf("mslsp_%s_%s.log", command, ts)))
+
+tw_log("INFO", "MSLSP driver  year=", year_arg, " command=", command,
+       " overwrite=", overwrite_flag,
+       " pid=", Sys.getpid(),
+       " SGE_TASK_ID=", Sys.getenv("SGE_TASK_ID", ""))
+
+if (command == "prep-static") {
+  tilewise_prep_static(year_arg, product)
+
+} else if (command == "extract") {
+  prep <- tilewise_prep_static(year_arg, product)
+  tilewise_run(prep, mslsp_time_key, product, overwrite = overwrite_flag)
+
+} else if (command == "combine") {
+  prep <- tilewise_prep_static(year_arg, product)
+  tilewise_combine(prep, mslsp_time_key, product, overwrite = overwrite_flag)
+
+} else if (command == "all") {
+  prep <- tilewise_prep_static(year_arg, product)
+  tilewise_run(prep, mslsp_time_key, product, overwrite = overwrite_flag)
+  tilewise_combine(prep, mslsp_time_key, product, overwrite = overwrite_flag)
+
+} else {
+  stop("Unknown command: ", command, ". Use: prep-static | extract | combine | all")
+}
diff --git a/modules/data.remote/inst/ccmmf/hls/tilewise_mslsp_implementation.R b/modules/data.remote/inst/ccmmf/hls/tilewise_mslsp_implementation.R
new file mode 100644
index 00000000000..87e22c67b98
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/hls/tilewise_mslsp_implementation.R
@@ -0,0 +1,525 @@
+# MSLSP product definition for the tilewise framework (prep, scenes, extract, combine).
+# Sourced after extract_summary_core.R and combine_mslsp_tilepieces.R by the MSLSP driver.
+#
+# Main inputs: env paths for LandIQ, MSLSP NetCDF roots, parcel-tile map RDS.
+# Main outputs: tilepieces and annual Parquet under phenology/raw_mslsp_v4.1.
+# How to run: sourced only; use tilewise_mslsp_driver.R for CLI.
+# Workflow: monitoring workflow MSLSP branch.
+
+#### Configuration
+mslsp_landiq_v4    <- Sys.getenv("CCMMF_LANDIQ_V4", "/projectnb/dietzelab/ccmmf/LandIQ-harmonized-v4.1")
+mslsp_management   <- Sys.getenv("CCMMF_MANAGEMENT", "/projectnb/dietzelab/ccmmf/management")
+mslsp_parcels_gpkg <- file.path(mslsp_landiq_v4, "parcels-consolidated.gpkg")
+mslsp_crops_parq   <- file.path(mslsp_landiq_v4, "crops_all_years.parq")
+mslsp_cropcode_lookup <- file.path(mslsp_management, "LandIQ_cropCode_lookup_table.csv")
+mslsp_legacy_dir   <- Sys.getenv("mslsp_legacy_dir", "/projectnb/dietzelab/ccmmf/HLS_data")
+mslsp_new_base     <- Sys.getenv("mslsp_new_base", "/projectnb/dietzelab/ccmmf/data_phen/output")
+mslsp_out_root     <- file.path(mslsp_management, "phenology/raw_mslsp_v4.1")
+mslsp_parcel_tilemap <- Sys.getenv(
+  "mslsp_parcel_tilemap",
+  file.path(mslsp_management, "hls_parcel_tile_map_v4.1_years=2016-2024.rds")
+)
+
+mslsp_metrics   <- c("OGI", "50PCGI", "OGMx", "Peak", "OGD", "50PCGD", "OGMn",
+                     "EVImax", "EVIamp", "EVIarea")
+mslsp_qa_cat    <- c("gupQA", "gdownQA")
+mslsp_year_fields_mode <- c("NumCycles")
+mslsp_year_fields_mean <- c("numObs")
+
+mslsp_state <- new.env(parent = emptyenv())
+
+#### Scene index helpers
+
+mslsp_nc_path <- function(tile_id, year) {
+  yr  <- as.integer(year)
+  candidates <- c(
+    file.path(mslsp_legacy_dir, paste0("MSLSP_", tile_id, "_", yr, ".nc")),
+    file.path(mslsp_new_base, tile_id, "phenoMetrics", paste0("MSLSP_", tile_id, "_", yr, ".nc"))
+  )
+  hit <- candidates[file.exists(candidates)]
+  if (length(hit) == 0) NA_character_ else hit[1]
+}
+
+mslsp_load_parcel_tilemap <- function(year) {
+  if (!file.exists(mslsp_parcel_tilemap)) return(NULL)
+  dt <- as.data.table(readRDS(mslsp_parcel_tilemap))
+  if (!all(c("parcel_id", "tileIDs", "year") %in% names(dt))) return(NULL)
+  yr <- as.integer(year)
+  dt <- dt[as.integer(year) == yr, .(parcel_id = as.character(parcel_id), tileIDs)]
+  setkey(dt, parcel_id)
+  dt
+}
+
+mslsp_scene_index <- function(year, time_key, verbose = TRUE) {
+  yr  <- as.integer(year)
+  key <- as.character(yr)
+  tiles <- if (exists(key, envir = mslsp_state, inherits = FALSE)) {
+    get(key, envir = mslsp_state, inherits = FALSE)
+  } else {
+    tilemap <- mslsp_load_parcel_tilemap(yr)
+    if (is.null(tilemap) || nrow(tilemap) == 0) character() else {
+      sort(unique(unlist(strsplit(tilemap$tileIDs, ",", fixed = TRUE), use.names = FALSE)))
+    }
+  }
+  if (length(tiles) == 0) return(data.table(tile_id = character(), path = character()))
+  paths <- vapply(tiles, mslsp_nc_path, character(1), year = yr)
+  ok    <- !is.na(paths) & file.exists(paths)
+  out   <- data.table(tile_id = as.character(tiles[ok]), path = paths[ok])
+  if (verbose) {
+    missing_tiles <- as.character(tiles[!ok])
+    if (length(missing_tiles) > 0) {
+      sample_tiles <- paste(head(sort(unique(missing_tiles)), 12), collapse = ",")
+      if (length(unique(missing_tiles)) > 12) sample_tiles <- paste0(sample_tiles, ",...")
+      msg <- paste0("[MSLSP] scene_index year=", yr,
+                    " missing_tile_nc=", length(unique(missing_tiles)),
+                    " sample=", sample_tiles)
+      if (exists("tw_log", mode = "function")) {
+        tw_log("WARN", msg)
+      } else {
+        message(msg)
+      }
+    }
+    if (nrow(out) > 0) message("[MSLSP] scene_index year=", yr, " tiles=", nrow(out))
+  }
+  out
+}
+
+#### Static prep
+# PFT from lookup (CLASS, SUBCLASS -> PFT) so subclass-level differences (e.g. T19 vs T28 woody) are correct.
+mslsp_prep_static_tilewise <- function(year, ...) {
+  yr <- as.integer(year)
+  sf::sf_use_s2(FALSE)
+
+  lookup   <- data.table::fread(mslsp_cropcode_lookup)
+  ag_pairs <- lookup[is_agricultural == TRUE,
+                     .(CLASS = trimws(CLASS), SUBCLASS = as.character(SUBCLASS), PFT)]
+
+  # Read once from the single parquet file and filter in-memory.
+  # This avoids Arrow dataset URI registration issues seen on SCC for some jobs.
+  pq_result <- as.data.table(arrow::read_parquet(
+    mslsp_crops_parq,
+    col_select = c("parcel_id", "CLASS", "SUBCLASS", "year")
+  ))
+  ca_crop <- pq_result[as.integer(year) == yr, .(parcel_id, CLASS, SUBCLASS)]
+  ca_crop[, CLASS    := trimws(as.character(CLASS))]
+  ca_crop[, SUBCLASS := as.character(SUBCLASS)]
+  # Inner join: filters to agricultural only AND attaches PFT
+  ca_crop <- merge(ca_crop, ag_pairs, by = c("CLASS", "SUBCLASS"))
+  crop_ids <- unique(as.character(ca_crop$parcel_id))
+  if (length(crop_ids) == 0) stop("No agricultural parcel_ids found for year ", yr)
+
+  tilemap <- mslsp_load_parcel_tilemap(yr)
+  if (is.null(tilemap)) {
+    stop(
+      "Parcel-tile map not found for MSLSP.\n",
+      "  Path: ", mslsp_parcel_tilemap, "\n",
+      "  Build it: Rscript scripts/hls/build_hls_parcel_tile_map.R 2016 2024 overwrite"
+    )
+  }
+  tile_sub <- tilemap[crop_ids, on = "parcel_id", nomatch = 0]
+  missing_map <- setdiff(crop_ids, tile_sub$parcel_id)
+  if (length(missing_map) > 0) {
+    message("[MSLSP prep] parcel_ids missing from tile map (dropped): ", length(missing_map))
+    crop_ids <- intersect(crop_ids, tile_sub$parcel_id)
+    tile_sub <- tilemap[crop_ids, on = "parcel_id", nomatch = 0]
+  }
+
+  # Load geometries from v4.1 GPKG in batches to avoid huge SQL IN clauses.
+  layer      <- sf::st_layers(mslsp_parcels_gpkg)$name[1]
+  ids        <- unique(tile_sub$parcel_id)
+  chunks     <- split(ids, ceiling(seq_along(ids) / 5000L))
+  geom_chunks <- lapply(chunks, function(x) {
+    esc <- gsub("'", "''", x, fixed = TRUE)
+    q   <- sprintf('SELECT * FROM "%s" WHERE parcel_id IN (%s)',
+                   layer, paste0("'", esc, "'", collapse = ","))
+    sf::st_read(mslsp_parcels_gpkg, query = q, quiet = TRUE)
+  })
+  polys <- do.call(rbind, geom_chunks)
+  polys$parcel_id <- as.character(polys$parcel_id)
+  polys <- polys[!sf::st_is_empty(sf::st_geometry(polys)), ]
+
+  missing_geom <- setdiff(ids, polys$parcel_id)
+  if (length(missing_geom) > 0) {
+    message("[MSLSP prep] parcel_ids missing geometry (dropped): ", length(missing_geom))
+    tile_sub <- tile_sub[!parcel_id %in% missing_geom]
+  }
+  tile_sub <- tile_sub[polys$parcel_id, on = "parcel_id", nomatch = 0]
+
+  # Attach tile_ids list column so tilewise_core can build the tile->parcel index.
+  polys$tile_ids <- strsplit(tile_sub$tileIDs, ",", fixed = TRUE)
+  active_tiles   <- sort(unique(unlist(polys$tile_ids, use.names = FALSE)))
+  mslsp_state[[as.character(yr)]] <- active_tiles
+
+  out_dir <- file.path(mslsp_out_root, paste0("year=", yr))
+  dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
+
+  list(year = yr, polys = polys, out_dir = out_dir, active_tiles = active_tiles)
+}
+
+#### Scene extraction
+mslsp_process_scene_tilewise <- function(prep, scene_row, parcels_this_tile, tile_id) {
+  log_skip <- function(level = "WARN", reason, extra = "") {
+    msg <- paste0("[MSLSP extract] tile=", tile_id,
+                  " reason=", reason,
+                  if (nzchar(extra)) paste0(" ", extra) else "")
+    if (exists("tw_log", mode = "function")) {
+      tw_log(level, msg)
+    } else {
+      message(msg)
+    }
+  }
+
+  if (!requireNamespace("exactextractr", quietly = TRUE)) {
+    stop("exactextractr is required for MSLSP extraction. Install with: install.packages('exactextractr')")
+  }
+  path <- scene_row$path[1]
+  if (!file.exists(path)) {
+    log_skip("WARN", "missing_scene_file", paste0("path=", path))
+    return(NULL)
+  }
+  yr <- prep$year
+
+  r_all <- try(terra::rast(path), silent = TRUE)
+  if (inherits(r_all, "try-error")) {
+    log_skip("WARN", "unreadable_scene_raster", paste0("path=", path))
+    return(NULL)
+  }
+
+  # Identify which layers are present in this NetCDF.
+  nms     <- names(r_all)
+  qa1     <- intersect(mslsp_qa_cat, nms)
+  qa2     <- intersect(paste0(mslsp_qa_cat, "_2"), nms)
+  yr_mode <- intersect(mslsp_year_fields_mode, nms)   # e.g. NumCycles
+  yr_mean <- intersect(mslsp_year_fields_mean, nms)   # e.g. numObs
+
+  cycle1_layers <- intersect(c(mslsp_metrics, qa1, yr_mode, yr_mean), nms)
+  cycle2_layers <- intersect(c(paste0(mslsp_metrics, "_2"), qa2), nms)
+  if (length(cycle1_layers) == 0 && length(cycle2_layers) == 0) {
+    log_skip("WARN", "no_expected_layers_in_scene", paste0("path=", path))
+    return(NULL)
+  }
+
+  # Reproject parcels to the raster CRS and crop to their bounding box.
+  # Both cycles share this single crop of the full-tile raster.
+  target_crs <- terra::crs(r_all)
+  parcels_tr <- sf::st_transform(parcels_this_tile, target_crs)
+  bbox_vect  <- terra::vect(sf::st_as_sfc(sf::st_bbox(parcels_tr)))
+  r_crop     <- try(terra::crop(r_all, bbox_vect), silent = TRUE)
+  if (inherits(r_crop, "try-error")) {
+    log_skip("WARN", "crop_extent_no_overlap", paste0("path=", path))
+    return(NULL)
+  }
+
+  pid_vec <- as.character(parcels_tr$parcel_id)
+
+  # Build an exactextractr per-polygon callback for a given set of layers.
+  #
+  # exactextractr calls this once per polygon with:
+  #   values    - data.frame with one column per raster layer, one row per pixel
+  #   cov_fracs - numeric vector of pixel coverage fractions (0-1)
+  #
+  # Arguments:
+  #   metric_cols - layer names aggregated by weighted mean + SD
+  #   mode_cols   - layer names aggregated by weighted mode + mode_frac
+  #   key_col     - single layer used to define "valid" pixels:
+  #                 a pixel is valid if it overlaps the polygon and key_col is not NA
+  #   col_rename  - function applied to a layer name before naming the output column;
+  #                 for cycle 2 this strips "_2" so both cycles share the same schema
+  #                 (e.g. "OGI_2" becomes "OGI", output is "OGI_mean" not "OGI_2_mean")
+  make_summarize_poly <- function(metric_cols, mode_cols, key_col, col_rename = identity) {
+    function(values, cov_fracs) {
+      pos         <- cov_fracs > 0
+      ok          <- pos & !is.na(values[[key_col]])
+      w_total     <- sum(cov_fracs[pos])
+      w_valid_key <- sum(cov_fracs[ok])
+
+      out <- list(
+        n_valid = sum(ok),
+        w_valid = w_valid_key,
+        sum_w2  = sum(cov_fracs[ok]^2),   # for n_eff = w_valid^2 / sum_w2 -> SE = sd / sqrt(n_eff)
+        na_frac = if (w_total > 0) 1 - w_valid_key / w_total else NA_real_
+      )
+
+      for (col in metric_cols) {
+        s    <- weighted_stats(values[[col]], cov_fracs)
+        base <- col_rename(col)
+        out[[paste0(base, "_mean")]] <- s$mean
+        out[[paste0(base, "_sd")]]   <- s$sd
+      }
+
+      for (col in mode_cols) {
+        m    <- weighted_mode_stats(values[[col]], cov_fracs)
+        base <- col_rename(col)
+        out[[paste0(base, "_mode")]]      <- m$mode
+        out[[paste0(base, "_mode_frac")]] <- m$mode_frac
+      }
+
+      # Return a 1-row data.frame so exactextractr combines as one row per polygon
+      # (returning a raw list causes wrong combination: one row per list element).
+      # check.names=FALSE is required: layer names like "50PCGI_mean" start with a digit
+      # and would otherwise be silently renamed to "X50PCGI_mean" by as.data.frame().
+      as.data.frame(out, stringsAsFactors = FALSE, check.names = FALSE)
+    }
+  }
+
+  out_all <- list()
+
+  # --- Cycle 1 ---
+  if (length(cycle1_layers) > 0) {
+    metric1 <- intersect(c(mslsp_metrics, yr_mean), cycle1_layers)
+    mode1   <- intersect(c(qa1, yr_mode), cycle1_layers)
+    key1    <- if (length(metric1) > 0) metric1[1] else mode1[1]
+    cb1     <- make_summarize_poly(metric1, mode1, key1)
+    res1    <- try(
+      exactextractr::exact_extract(r_crop[[cycle1_layers]], parcels_tr, fun = cb1, progress = FALSE),
+      silent = TRUE
+    )
+    if (inherits(res1, "try-error")) {
+      log_skip("ERROR", "exact_extract_cycle1_failed", paste0("path=", path))
+    }
+    if (!inherits(res1, "try-error") && !is.null(res1)) {
+      dt1 <- as.data.table(res1)
+      dt1[, c("parcel_id", "year", "cycle") := list(pid_vec, yr, 1L)]
+      out_all[[length(out_all) + 1]] <- dt1
+    }
+  }
+
+  # --- Cycle 2 ---
+  # col_rename strips the trailing "_2" from layer names so output columns match the
+  # cycle 1 schema. e.g. "OGI_2" -> "OGI_mean", "gupQA_2" -> "gupQA_mode".
+  # The `cycle` column then distinguishes the two.
+  if (length(cycle2_layers) > 0) {
+    strip2  <- function(x) sub("_2$", "", x)
+    metric2 <- intersect(paste0(mslsp_metrics, "_2"), cycle2_layers)
+    mode2   <- intersect(qa2, cycle2_layers)
+    key2    <- if (length(metric2) > 0) metric2[1] else mode2[1]
+    cb2     <- make_summarize_poly(metric2, mode2, key2, col_rename = strip2)
+    res2    <- try(
+      exactextractr::exact_extract(r_crop[[cycle2_layers]], parcels_tr, fun = cb2, progress = FALSE),
+      silent = TRUE
+    )
+    if (inherits(res2, "try-error")) {
+      log_skip("ERROR", "exact_extract_cycle2_failed", paste0("path=", path))
+    }
+    if (!inherits(res2, "try-error") && !is.null(res2)) {
+      dt2 <- as.data.table(res2)
+      dt2[, c("parcel_id", "year", "cycle") := list(pid_vec, yr, 2L)]
+      out_all[[length(out_all) + 1]] <- dt2
+    }
+  }
+
+  if (length(out_all) == 0) {
+    log_skip("WARN", "no_cycle_outputs_written", paste0("path=", path))
+    return(NULL)
+  }
+  out <- rbindlist(out_all, fill = TRUE)
+  out[, parcel_id := as.character(parcel_id)]
+
+  # Harmonize EVI scaling across vintages of MSLSP NetCDF files.
+  #
+  # Why this is needed:
+  # - 2018-2019 files (legacy path) encode EVI scaling with NetCDF attributes
+  #   scale_factor/add_offset, which terra typically applies on read.
+  # - 2020+ files (new path) often encode equivalent information as scale/offset.
+  #   In this workflow, those can come through as raw integer-like values
+  #   (~0..10000 for EVImax/EVIamp, ~0..32766 for EVIarea).
+  #
+  # Without harmonization, different years end up on different numeric scales
+  # and downstream matching/QC behaves inconsistently.
+  #
+  # Strategy:
+  # - Detect raw-like magnitude in extracted parcel summaries.
+  # - Apply scaling exactly once only when needed.
+  maybe_scale_evi <- function(dt) {
+    evi_cols <- intersect(c("EVImax_mean", "EVIamp_mean", "EVIarea_mean"), names(dt))
+    if (length(evi_cols) == 0) return(dt)
+    med_abs <- vapply(evi_cols, function(cc) {
+      v <- suppressWarnings(as.numeric(dt[[cc]]))
+      stats::median(abs(v), na.rm = TRUE)
+    }, numeric(1))
+    # Physical EVI values should be O(1), not O(1e3-1e4).
+    needs_scale <- any(is.finite(med_abs[c("EVImax_mean", "EVIamp_mean")]) &
+                         med_abs[c("EVImax_mean", "EVIamp_mean")] > 10, na.rm = TRUE)
+    if (!isTRUE(needs_scale)) return(dt)
+
+    scale_f <- c(EVImax = 1e-4, EVIamp = 1e-4, EVIarea = 1e-2)
+    for (nm in names(scale_f)) {
+      mcol <- paste0(nm, "_mean")
+      scol <- paste0(nm, "_sd")
+      if (mcol %in% names(dt)) dt[[mcol]] <- suppressWarnings(as.numeric(dt[[mcol]])) * scale_f[[nm]]
+      if (scol %in% names(dt)) dt[[scol]] <- suppressWarnings(as.numeric(dt[[scol]])) * scale_f[[nm]]
+    }
+    if (exists("tw_log", mode = "function")) {
+      tw_log("WARN", "[MSLSP extract] tile=", tile_id, " applied EVI scale harmonization (raw-like magnitude detected)")
+    }
+    dt
+  }
+  out <- maybe_scale_evi(out)
+
+  out[]
+}
+
+# =============================================================================
+# Combine helpers (used by combine_mslsp_tilepieces.R and the bucket fallback)
+# =============================================================================
+
+# Core aggregation logic, shared by mslsp_combine and the bucket fallback.
+# Aggregates by (parcel_id, year, cycle) using area-weighted statistics.
+mslsp_aggregate_tilepieces <- function(dt, year) {
+  key <- c("parcel_id", "year", "cycle")
+  if (!all(key %in% names(dt))) return(dt)
+  dt[, year := as.integer(year)]
+
+  safe_weighted_mean <- function(x, w) {
+    ok <- is.finite(x) & is.finite(w) & w > 0
+    if (!any(ok)) return(NA_real_)
+    sum(w[ok] * x[ok]) / sum(w[ok])
+  }
+  safe_weighted_sd <- function(mu, sd, w) {
+    ok <- is.finite(mu) & is.finite(sd) & is.finite(w) & w > 0
+    if (!any(ok)) return(NA_real_)
+    ww <- w[ok]; mm <- mu[ok]; ss <- sd[ok]
+    m_all   <- sum(ww * mm) / sum(ww)
+    var_all <- sum(ww * (ss^2 + mm^2)) / sum(ww) - m_all^2
+    sqrt(pmax(0, var_all))
+  }
+  weighted_mode <- function(x, w) {
+    ok <- !is.na(x) & is.finite(w) & w > 0
+    if (!any(ok)) return(NA_real_)
+    dd <- data.table(x = x[ok], w = w[ok])[, .(w = sum(w)), by = x][order(-w)]
+    as.numeric(dd$x[1])
+  }
+  weighted_mode_frac <- function(x, mode_val, w) {
+    ok <- !is.na(x) & is.finite(w) & w > 0
+    if (!any(ok) || is.na(mode_val)) return(NA_real_)
+    denom <- sum(w[ok])
+    if (!is.finite(denom) || denom <= 0) return(NA_real_)
+    sum(w[ok & x == mode_val]) / denom
+  }
+
+  agg <- dt[, {
+    w <- as.numeric(w_valid)
+    out <- list(
+      n_valid = as.integer(sum(as.integer(n_valid), na.rm = TRUE)),
+      w_valid = as.numeric(sum(w, na.rm = TRUE)),
+      sum_w2  = as.numeric(sum(as.numeric(sum_w2), na.rm = TRUE)),
+      na_frac = safe_weighted_mean(as.numeric(na_frac), w)
+    )
+    for (m in mslsp_metrics) {
+      mcol <- paste0(m, "_mean"); scol <- paste0(m, "_sd")
+      mu <- if (mcol %in% names(.SD)) as.numeric(get(mcol)) else rep(NA_real_, .N)
+      sd <- if (scol %in% names(.SD)) as.numeric(get(scol)) else rep(NA_real_, .N)
+      out[[mcol]] <- safe_weighted_mean(mu, w)
+      out[[scol]] <- safe_weighted_sd(mu, sd, w)
+    }
+    for (q in c(mslsp_qa_cat, mslsp_year_fields_mode)) {
+      qcol  <- paste0(q, "_mode"); fqcol <- paste0(q, "_mode_frac")
+      qx    <- if (qcol %in% names(.SD)) as.numeric(get(qcol)) else rep(NA_real_, .N)
+      qm    <- weighted_mode(qx, w)
+      out[[qcol]]  <- qm
+      out[[fqcol]] <- weighted_mode_frac(qx, qm, w)
+    }
+    for (f in mslsp_year_fields_mean) {
+      mcol <- paste0(f, "_mean"); scol <- paste0(f, "_sd")
+      mu   <- if (mcol %in% names(.SD)) as.numeric(get(mcol)) else rep(NA_real_, .N)
+      sd   <- if (scol %in% names(.SD)) as.numeric(get(scol)) else rep(NA_real_, .N)
+      out[[mcol]] <- safe_weighted_mean(mu, w)
+      out[[scol]] <- safe_weighted_sd(mu, sd, w)
+    }
+    out
+  }, by = key]
+  setcolorder(agg, c(key, setdiff(names(agg), key)))
+  agg
+}
+
+# =============================================================================
+# Product object
+# =============================================================================
+
+product_mslsp <- function() {
+  if (!exists("weighted_stats", mode = "function")) {
+    stop("Source extract_summary_core.R first (provides weighted_stats, weighted_mode_stats)")
+  }
+  if (!exists("mslsp_combine", mode = "function")) {
+    stop("Source combine_mslsp_tilepieces.R first (provides mslsp_combine)")
+  }
+  list(
+    prep_static          = mslsp_prep_static_tilewise,
+    scene_index          = mslsp_scene_index,
+    scene_index_tile_col = "tile_id",
+    process_scene        = mslsp_process_scene_tilewise,
+
+    # MSLSP loads all geometry upfront in prep_static, so no prepare_tile needed.
+    # tilewise_core will slice prep$polys[parcel_indices, ] per tile.
+    prepare_tile = NULL,
+
+    combine = mslsp_combine,
+
+    path_tilepieces   = function(out_dir, year, time_key) {
+      file.path(out_dir, sprintf("tilepieces_year=%d", year))
+    },
+    path_final_output = function(out_dir, year, time_key) {
+      file.path(out_dir, sprintf("mslsp_year=%d.parquet", year))
+    },
+    # path_combine_parts not used (combine handles output directly)
+    path_combine_parts = function(out_dir, year, time_key) NULL,
+
+    validate_tilepiece = function(dt) {
+      req      <- c("parcel_id", "year", "cycle", "n_valid", "w_valid")
+      has_req  <- all(req %in% names(dt))
+      has_met  <- any(paste0(mslsp_metrics, "_mean") %in% names(dt))
+      has_req && has_met
+    },
+
+    # Bucket fallback (used only when combine is not available).
+    prepare_for_scatter = function(dt, n_buckets) {
+      dt[, bucket := parcel_id_to_bucket(parcel_id, n_buckets)]
+      dt
+    },
+    scatter_cols = function(dt) {
+      cols <- setdiff(names(dt), "bucket")
+      dt[, ..cols]
+    },
+    aggregate_bucket = function(dt, year) mslsp_aggregate_tilepieces(dt, year),
+
+    empty_tilepiece_schema = function() {
+      base <- data.table(
+        parcel_id = character(), year = integer(), cycle = integer(),
+        n_valid = integer(), w_valid = double(), sum_w2 = double(), na_frac = double()
+      )
+      for (m in mslsp_metrics) {
+        base[[paste0(m, "_mean")]] <- double()
+        base[[paste0(m, "_sd")]]   <- double()
+      }
+      for (q in c(mslsp_qa_cat, mslsp_year_fields_mode)) {
+        base[[paste0(q, "_mode")]]      <- double()
+        base[[paste0(q, "_mode_frac")]] <- double()
+      }
+      for (f in mslsp_year_fields_mean) {
+        base[[paste0(f, "_mean")]] <- double()
+        base[[paste0(f, "_sd")]]   <- double()
+      }
+      base[, tile_id := character()]
+      base
+    },
+    empty_part_schema = function(year) {
+      base <- data.table(
+        parcel_id = character(), year = integer(), cycle = integer(),
+        n_valid = integer(), w_valid = double(), sum_w2 = double(), na_frac = double()
+      )
+      for (m in mslsp_metrics) {
+        base[[paste0(m, "_mean")]] <- double()
+        base[[paste0(m, "_sd")]]   <- double()
+      }
+      for (q in c(mslsp_qa_cat, mslsp_year_fields_mode)) {
+        base[[paste0(q, "_mode")]]      <- double()
+        base[[paste0(q, "_mode_frac")]] <- double()
+      }
+      for (f in mslsp_year_fields_mean) {
+        base[[paste0(f, "_mean")]] <- double()
+        base[[paste0(f, "_sd")]]   <- double()
+      }
+      base
+    }
+  )
+}
diff --git a/modules/data.remote/inst/ccmmf/hls/tilewise_ndti_driver.R b/modules/data.remote/inst/ccmmf/hls/tilewise_ndti_driver.R
new file mode 100644
index 00000000000..566f9175682
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/hls/tilewise_ndti_driver.R
@@ -0,0 +1,88 @@
+#!/usr/bin/env Rscript
+# NDTI tilewise driver: prep-static, extract, combine, or all for one year and month(s).
+# Uses tilewise_core and NDTI implementation; writes under tillage/ndti_v4.1.
+#
+# Main inputs: year, command, month integers 1-12, optional overwrite.
+# Main outputs: tilepiece CSV.gz and ndti_year=Y_month=MM.parquet per month.
+# How to run: Rscript tilewise_ndti_driver.R <year> <command> [months ...] [overwrite]
+# Workflow: monitoring workflow NDTI branch (see monitoring_workflow_flowchart.mmd).
+
+script_dir <- if (length(file_arg <- commandArgs(trailingOnly = FALSE)[grepl("^--file=",
+                   commandArgs(trailingOnly = FALSE))])) {
+  dirname(sub("^--file=", "", file_arg[1]))
+} else "."
+
+suppressPackageStartupMessages(library(arrow))
+local({
+  tmp <- tempfile(fileext = ".parquet")
+  on.exit(unlink(tmp))
+  arrow::write_parquet(data.frame(x = 1L), tmp)
+  arrow::read_parquet(tmp)
+})
+
+source(file.path(script_dir, "tilewise_ndti_implementation.R"))
+source(file.path(script_dir, "combine_ndti_tilepieces.R"))
+source(file.path(script_dir, "tilewise_core.R"))
+
+path_ndti_output <- file.path(
+  Sys.getenv("CCMMF_MANAGEMENT", "/projectnb/dietzelab/ccmmf/management"),
+  "tillage/ndti_v4.1"
+)
+product <- product_ndti()
+
+args <- commandArgs(trailingOnly = TRUE)
+is_overwrite   <- function(x) tolower(x) %in% c("true", "t", "yes", "y", "overwrite")
+overwrite_flag <- any(sapply(args, is_overwrite))
+args           <- args[!sapply(args, is_overwrite)]
+
+if (length(args) < 2) {
+  stop("Usage: Rscript tilewise_ndti_driver.R <year> <prep-static|extract|combine|all> [month ...] [overwrite]")
+}
+
+year_arg <- as.integer(args[1])
+command  <- tolower(args[2])
+
+months_from <- function(x) {
+  vals <- suppressWarnings(as.integer(x))
+  unique(sort(vals[!is.na(vals) & vals >= 1 & vals <= 12]))
+}
+
+months_str <- paste(months_from(args[-(1:2)]), collapse = "-")
+if (!nzchar(months_str)) months_str <- "prep"
+ts      <- format(Sys.time(), "%Y%m%d_%H%M%S")
+log_dir <- file.path(path_ndti_output, sprintf("year=%d", year_arg), "logs")
+tilewise_log_init(file.path(log_dir,
+  sprintf("ndti_%s_m%s_%s.log", command, months_str, ts)))
+
+tw_log("INFO", "NDTI driver  year=", year_arg, " command=", command,
+       " overwrite=", overwrite_flag,
+       " pid=", Sys.getpid(),
+       " SGE_TASK_ID=", Sys.getenv("SGE_TASK_ID", ""))
+
+if (command == "prep-static") {
+  tilewise_prep_static(year_arg, product, overwrite = overwrite_flag)
+
+} else if (command == "extract") {
+  months <- months_from(args[-(1:2)])
+  if (length(months) == 0) stop("Usage: ... extract <month> [month ...] [overwrite]")
+  prep <- tilewise_prep_static(year_arg, product, overwrite = overwrite_flag)
+  for (m in months) tilewise_run(prep, m, product, overwrite = overwrite_flag)
+
+} else if (command == "combine") {
+  months <- months_from(args[-(1:2)])
+  if (length(months) == 0) stop("Usage: ... combine <month> [month ...] [overwrite]")
+  prep <- tilewise_prep_static(year_arg, product)
+  for (m in months) tilewise_combine(prep, m, product, overwrite = overwrite_flag)
+
+} else if (command == "all") {
+  months <- months_from(args[-(1:2)])
+  if (length(months) == 0) stop("Usage: ... all <month> [month ...] [overwrite]")
+  prep <- tilewise_prep_static(year_arg, product, overwrite = overwrite_flag)
+  for (m in months) {
+    tilewise_run(prep, m, product, overwrite = overwrite_flag)
+    tilewise_combine(prep, m, product, overwrite = overwrite_flag)
+  }
+
+} else {
+  stop("Unknown command: ", command, ". Use: prep-static | extract | combine | all")
+}
diff --git a/modules/data.remote/inst/ccmmf/hls/tilewise_ndti_implementation.R b/modules/data.remote/inst/ccmmf/hls/tilewise_ndti_implementation.R
new file mode 100644
index 00000000000..490788e6363
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/hls/tilewise_ndti_implementation.R
@@ -0,0 +1,361 @@
+# NDTI product for the tilewise framework (HLS bands, Fmask, per-parcel NDTI).
+# Weighted stats: n_eff = w_valid^2/sum_w2; SE_weighted = ndti_sd/sqrt(n_eff).
+#
+# Main inputs: HLSL/HLSS roots, LandIQ paths, parcel-tile map (env).
+# Main outputs: tilepieces and monthly Parquet under tillage/ndti_v4.1.
+# How to run: sourced from tilewise_ndti_driver.R.
+# Workflow: monitoring workflow NDTI branch.
+
+suppressPackageStartupMessages({
+  library(sf)
+  library(terra)
+  library(data.table)
+  library(stringr)
+  library(arrow)
+  library(dplyr)
+  library(exactextractr)
+})
+sf::sf_use_s2(FALSE)
+
+terra::terraOptions(threads = max(1L, suppressWarnings(
+  as.integer(Sys.getenv("NDTI_TERRA_THREADS", "8"))
+)))
+
+#### Configuration
+ndti_management   <- Sys.getenv("CCMMF_MANAGEMENT", "/projectnb/dietzelab/ccmmf/management")
+ndti_landiq_v4    <- Sys.getenv("CCMMF_LANDIQ_V4", "/projectnb/dietzelab/ccmmf/LandIQ-harmonized-v4.1")
+ndti_parcels_gpkg <- file.path(ndti_landiq_v4, "parcels-consolidated.gpkg")
+ndti_crops_parq   <- file.path(ndti_landiq_v4, "crops_all_years.parq")
+ndti_cropcode_csv <- file.path(ndti_management, "LandIQ_cropCode_lookup_table.csv")
+ndti_hlsl_base    <- Sys.getenv("HLSL_BASE", "/projectnb/dietzelab/XinyuanJi/State_of_California_HLSL")
+ndti_hlss_base    <- Sys.getenv("HLSS_BASE", "/projectnb/dietzelab/XinyuanJi/State_of_California_HLSS")
+ndti_out_root     <- file.path(ndti_management, "tillage/ndti_v4.1")
+ndti_parcel_tilemap <- Sys.getenv(
+  "NDTI_PARCEL_TILEMAP",
+  file.path(ndti_management, "hls_parcel_tile_map_v4.1_years=2016-2024.rds")
+)
+
+#### Path helpers
+
+# Intermediate per-tile CSV.gz files (one per tile per month)
+path_tilepieces <- function(output_dir, year, month) {
+  file.path(output_dir, sprintf("tilepieces_year=%d_month=%02d", year, month))
+}
+
+# Final combined output: one parquet per month inside year= directory.
+# All monthly files form a Hive-partitioned dataset: arrow::open_dataset(ndti_out_root)
+path_monthly_output <- function(output_dir, year, month) {
+  file.path(output_dir, sprintf("ndti_year=%d_month=%02d.parquet", year, month))
+}
+
+#### Small utilities
+
+# Parse MGRS tile ID from HLS filename (e.g. "HLS.L30.T10SFF...." -> "10SFF")
+extract_tile_id_from_filename <- function(filenames) {
+  matched <- str_extract(filenames, "T[0-9A-Z]{5}\\.")
+  result  <- sub("^T|\\.$", "", matched)
+  result[is.na(matched)] <- NA_character_
+  result
+}
+
+sanitize_tile_id_for_filename <- function(tile_id) gsub("[^0-9A-Za-z]+", "_", tile_id)
+
+# HLS filenames carry a trailing dot on the tile ID ("10SFF."); strip for matching
+normalize_tile_id <- function(x) sub("\\.+$", "", trimws(as.character(x)))
+
+# Fmask bit flags: cloud=bit1, cloud shadow=bit3, snow=bit4
+is_fmask_bad_pixel <- function(fmask_values) {
+  (bitwAnd(fmask_values, 2L)  != 0) |
+  (bitwAnd(fmask_values, 8L)  != 0) |
+  (bitwAnd(fmask_values, 16L) != 0)
+}
+
+# Append rows to a CSV, writing header only on the first write
+append_to_csv <- function(data, filepath) {
+  if (file.exists(filepath)) {
+    fwrite(data, filepath, append = TRUE, col.names = FALSE)
+  } else {
+    fwrite(data, filepath)
+  }
+}
+
+# =============================================================================
+# Scene index
+# =============================================================================
+
+# Build a table of all HLS scenes (date x sensor x tile) for a given year-month.
+build_scene_index <- function(year, month, verbose = TRUE) {
+  month_start <- as.Date(paste(year, month, 1, sep = "-"))
+  month_end   <- seq(month_start, by = "month", length.out = 2L)[2L] - 1L
+
+  list_scenes <- function(dir_path, band_pattern, sensor_name) {
+    if (!dir.exists(dir_path)) return(data.table())
+    files   <- list.files(dir_path, pattern = band_pattern, full.names = TRUE)
+    if (length(files) == 0) return(data.table())
+    doy_str <- str_extract(basename(files), "\\d{7}")
+    dates   <- as.Date(doy_str, "%Y%j")
+    keep    <- !is.na(dates) & dates >= month_start & dates <= month_end
+    if (!any(keep)) return(data.table())
+    data.table(
+      date    = dates[keep],
+      sensor  = sensor_name,
+      tile_id = extract_tile_id_from_filename(basename(files)[keep]),
+      path    = files[keep]
+    )
+  }
+
+  scenes <- rbindlist(list(
+    list_scenes(file.path(ndti_hlsl_base, year), ".*B06.*\\.tif$", "HLSL"),
+    list_scenes(file.path(ndti_hlss_base, year), ".*B11.*\\.tif$", "HLSS")
+  ))
+  scenes <- scenes[!is.na(tile_id)]
+  scenes[, tile_id := normalize_tile_id(tile_id)]
+  if (verbose) message("[scene] ", nrow(scenes), " scenes for ", year, "-", sprintf("%02d", month))
+  scenes
+}
+
+#### Geometry loading
+load_parcel_geometries <- function(parcel_ids) {
+  parcel_ids <- unique(as.character(parcel_ids))
+  layer_name <- st_layers(ndti_parcels_gpkg)$name[1]
+  chunks <- split(parcel_ids, ceiling(seq_along(parcel_ids) / 5000L))
+  out <- lapply(chunks, function(chunk) {
+    ids_sql <- paste0("'", gsub("'", "''", chunk, fixed = TRUE), "'", collapse = ",")
+    st_read(ndti_parcels_gpkg,
+            query = sprintf('SELECT * FROM "%s" WHERE parcel_id IN (%s)', layer_name, ids_sql),
+            quiet = TRUE)
+  })
+  parcels <- do.call(rbind, out)
+  parcels <- st_zm(parcels, drop = TRUE, what = "ZM")
+  parcels[!st_is_empty(st_geometry(parcels)), ]
+}
+
+load_parcel_tilemap <- function(year = NULL) {
+  if (!file.exists(ndti_parcel_tilemap)) return(NULL)
+  dt <- as.data.table(readRDS(ndti_parcel_tilemap))
+  if ("year" %in% names(dt)) {
+    if (is.null(year)) return(NULL)
+    dt <- dt[year == as.integer(year)][, year := NULL]
+  }
+  if (!all(c("parcel_id", "tileIDs") %in% names(dt))) return(NULL)
+  dt[, parcel_id := as.character(parcel_id)]
+  dt <- dt[, .(tileIDs = paste(sort(unique(unlist(
+    strsplit(paste(tileIDs, collapse = ","), ",", fixed = TRUE)
+  ))), collapse = ",")), by = parcel_id]
+  setkey(dt, parcel_id)
+  dt
+}
+
+#### Static prep (cached per year)
+ndti_prep_static_tilewise <- function(year, overwrite = FALSE, verbose = TRUE) {
+  yr         <- as.integer(year)
+  output_dir <- file.path(ndti_out_root, paste0("year=", yr))
+  cache_path <- file.path(output_dir, sprintf("ndti_prep_static_year=%d.rds", yr))
+
+  if (file.exists(cache_path) && !overwrite) {
+    if (verbose) message("[prep] using cache: ", cache_path)
+    prep <- readRDS(cache_path)
+    if (!is.null(prep$polys)) return(prep)
+  } else {
+    lookup   <- fread(ndti_cropcode_csv)
+    ag_pairs <- unique(lookup[is_agricultural == TRUE,
+                              .(CLASS = trimws(CLASS), SUBCLASS = as.character(SUBCLASS))])
+    ag_classes_filter <- unique(ag_pairs$CLASS)
+
+    pq_result <- arrow::open_dataset(ndti_crops_parq) |>
+      dplyr::filter(year == !!yr, CLASS %in% !!ag_classes_filter) |>
+      dplyr::select(parcel_id, CLASS, SUBCLASS) |>
+      dplyr::collect()
+    ca_crop <- as.data.table(pq_result)
+    ca_crop[, CLASS    := trimws(as.character(CLASS))]
+    ca_crop[, SUBCLASS := as.character(SUBCLASS)]
+    ca_crop    <- merge(ca_crop, ag_pairs, by = c("CLASS", "SUBCLASS"))
+    parcel_ids <- unique(as.character(ca_crop$parcel_id))
+    if (length(parcel_ids) == 0) stop("No agricultural parcel_ids found for year ", yr)
+    if (verbose) message("[prep] agricultural parcels: ", length(parcel_ids))
+
+    tilemap <- load_parcel_tilemap(yr)
+    if (is.null(tilemap)) stop(
+      "Parcel-tile map not found: ", ndti_parcel_tilemap, "\n",
+      "Build it: Rscript scripts/hls/build_hls_parcel_tile_map.R 2016 2024 overwrite"
+    )
+
+    # tile_to_parcel_ids: kept for reference; polys drives tilewise_core
+    tile_to_parcel_ids <- local({
+      pid     <- unique(as.character(parcel_ids))
+      map_sub <- tilemap[pid, on = "parcel_id", nomatch = 0]
+      map_sub <- unique(map_sub, by = "parcel_id")
+      out <- list()
+      for (i in seq_len(nrow(map_sub))) {
+        for (tile in strsplit(map_sub$tileIDs[i], ",", fixed = TRUE)[[1]]) {
+          if (nzchar(tile)) out[[tile]] <- c(out[[tile]], map_sub$parcel_id[i])
+        }
+      }
+      out
+    })
+
+    covered   <- unique(unlist(tile_to_parcel_ids, use.names = FALSE))
+    n_missing <- length(setdiff(parcel_ids, covered))
+    if (n_missing > 0)
+      message("[prep] ", n_missing, " parcels missing from tile map (dropped) - rebuild map if large")
+
+    dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
+    prep <- list(
+      year               = yr,
+      out_dir            = output_dir,
+      tile_to_parcel_ids = tile_to_parcel_ids,
+      base_dirs          = list(HLSL = ndti_hlsl_base, HLSS = ndti_hlss_base),
+      band_other         = list(HLSL = c("B06", "B07"), HLSS = c("B11", "B12")),
+      fmask_suffix       = list(HLSL = "B06.tif$",     HLSS = "B11.tif$")
+    )
+    saveRDS(prep, cache_path)
+    if (verbose) message("[prep] saved: ", cache_path)
+  }
+
+  # Build polys data.table (parcel_id + tile_ids list-column) for tilewise_core.
+  # Geometry is NOT loaded here; it is loaded lazily per-tile in prepare_tile().
+  tile_ids <- names(prep$tile_to_parcel_ids)
+  parcel_tile <- rbindlist(lapply(tile_ids, function(tid) {
+    ids <- as.character(prep$tile_to_parcel_ids[[tid]])
+    if (length(ids) == 0) return(NULL)
+    data.table(parcel_id = ids, tile_id = tid)
+  }), use.names = TRUE, fill = TRUE)
+
+  if (is.null(parcel_tile) || nrow(parcel_tile) == 0) {
+    prep$polys <- data.table(parcel_id = character(), tile_ids = list())
+  } else {
+    prep$polys <- parcel_tile[, .(tile_ids = list(sort(unique(tile_id)))), by = parcel_id]
+  }
+  prep
+}
+
+#### Scene extraction
+# parcels_sf must be reprojected to raster CRS.
+extract_ndti_scene <- function(band1_path, band2_path, fmask_path,
+                               parcels_sf, parcel_ids, scene_date) {
+  bbox_vect <- terra::vect(sf::st_as_sfc(sf::st_bbox(parcels_sf)))
+  band1 <- try(terra::crop(terra::rast(band1_path), bbox_vect), silent = TRUE)
+  band2 <- try(terra::crop(terra::rast(band2_path), bbox_vect), silent = TRUE)
+  fmask <- try(terra::crop(terra::rast(fmask_path), bbox_vect), silent = TRUE)
+  if (inherits(band1, "try-error") || inherits(band2, "try-error") || inherits(fmask, "try-error")) {
+    return(NULL)
+  }
+
+  ndti <- (band1 - band2) / (band1 + band2)
+  ndti[is_fmask_bad_pixel(terra::values(fmask))] <- NA
+  names(ndti) <- "NDTI"
+
+  summarize_poly <- function(values, cov_fracs) {
+    pos     <- cov_fracs > 0
+    ok      <- !is.na(values) & pos
+    w_total <- sum(cov_fracs[pos])
+    w_valid <- sum(cov_fracs[ok])
+    if (!any(ok)) {
+      return(as.data.frame(list(ndti_mean = NA_real_, ndti_sd = NA_real_,
+                                n_valid = 0L, w_valid = 0, sum_w2 = 0, na_frac = NA_real_)))
+    }
+    v  <- values[ok]
+    w  <- cov_fracs[ok]
+    mu <- sum(w * v) / w_valid
+    as.data.frame(list(
+      ndti_mean = mu,
+      ndti_sd   = sqrt(sum(w * (v - mu)^2) / w_valid),
+      n_valid   = sum(ok),
+      w_valid   = w_valid,
+      sum_w2    = sum(w^2),   # n_eff = w_valid^2 / sum_w2  ->  SE = ndti_sd / sqrt(n_eff)
+      na_frac   = if (w_total > 0) 1 - w_valid / w_total else NA_real_
+    ))
+  }
+
+  result <- try(
+    exactextractr::exact_extract(ndti, parcels_sf, fun = summarize_poly,
+                                 progress = FALSE),
+    silent = TRUE
+  )
+  if (inherits(result, "try-error") || is.null(result)) return(NULL)
+
+  dt <- as.data.table(result)
+  # Assign parcel_id manually - exactextractr preserves row order matching parcels_sf.
+  dt[, parcel_id := parcels_sf$parcel_id]
+  dt[, date := scene_date]
+  dt
+}
+
+#### Product object
+product_ndti <- function() {
+  list(
+    prep_static          = ndti_prep_static_tilewise,
+    scene_index          = build_scene_index,
+    scene_index_tile_col = "tile_id",
+
+    # Load parcel geometry from GPKG for just this tile's parcels, reprojected
+    # to the raster CRS. Called once per tile before the scene loop.
+    prepare_tile = function(prep, tile_id, parcel_ids, scenes_this_tile) {
+      parcels_sf <- load_parcel_geometries(as.character(parcel_ids))
+      if (nrow(parcels_sf) == 0) return(NULL)
+      # Some scene files can be corrupt/truncated; choose the first readable
+      # scene to determine CRS so one bad file doesn't fail the whole tile.
+      raster_crs <- NULL
+      for (p in scenes_this_tile$path) {
+        r_try <- try(terra::rast(p), silent = TRUE)
+        if (inherits(r_try, "try-error")) next
+        c_try <- try(sf::st_crs(terra::crs(r_try)), silent = TRUE)
+        if (inherits(c_try, "try-error") || is.null(c_try) || is.na(c_try)) next
+        raster_crs <- c_try
+        break
+      }
+      if (is.null(raster_crs)) return(NULL)
+      sf::st_transform(parcels_sf, raster_crs)
+    },
+
+    # Extract NDTI stats for one HLS scene.
+    # tile_parcels is the sf object returned by prepare_tile (already reprojected).
+    process_scene = function(prep, scene_row, tile_parcels, tile_id) {
+      sensor <- scene_row$sensor[1]
+      b1     <- scene_row$path[1]
+      bands  <- list(HLSL = c("B06", "B07"), HLSS = c("B11", "B12"))[[sensor]]
+      b2     <- sub(bands[1], bands[2], b1, fixed = TRUE)
+      yr_str <- stringr::str_extract(basename(b1), "\\d{4}")
+      fmask  <- file.path(
+        prep$base_dirs[[sensor]], paste0(yr_str, "_Fmask"),
+        sub(prep$fmask_suffix[[sensor]], "Fmask.tif", basename(b1))
+      )
+      if (!file.exists(b2) || !file.exists(fmask)) return(NULL)
+      extract_ndti_scene(b1, b2, fmask, tile_parcels, tile_parcels$parcel_id,
+                         scene_row$date[1])
+    },
+
+    # Reads all tilepieces, aggregates across tiles, writes Parquet.
+    combine = function(prep, time_key, overwrite = FALSE, verbose = TRUE) {
+      ndti_combine(prep, time_key, overwrite = overwrite, verbose = verbose)
+    },
+
+    path_tilepieces    = path_tilepieces,
+    path_final_output  = path_monthly_output,
+    path_combine_parts = function(out_dir, year, time_key) NULL,
+
+    validate_tilepiece = function(dt) {
+      all(c("parcel_id", "date", "ndti_mean", "ndti_sd",
+            "n_valid", "w_valid", "sum_w2", "na_frac") %in% names(dt))
+    },
+
+    empty_tilepiece_schema = function() {
+      data.table(
+        parcel_id = character(),
+        date      = as.Date(integer(0), origin = "1970-01-01"),
+        ndti_mean = double(), ndti_sd = double(),
+        n_valid   = integer(), w_valid = double(),
+        sum_w2    = double(), na_frac = double()
+      )
+    },
+    empty_part_schema = function(year) {
+      data.table(
+        parcel_id = character(), year = integer(),
+        date      = as.Date(integer(0), origin = "1970-01-01"),
+        ndti_mean = double(), ndti_sd = double(),
+        n_valid   = integer(), w_valid = double(),
+        sum_w2    = double(), na_frac = double()
+      )
+    }
+  )
+}
diff --git a/modules/data.remote/inst/ccmmf/phenology/match_landiq_mslsp.R b/modules/data.remote/inst/ccmmf/phenology/match_landiq_mslsp.R
new file mode 100644
index 00000000000..dab7d2047da
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/phenology/match_landiq_mslsp.R
@@ -0,0 +1,605 @@
+#!/usr/bin/env Rscript
+# Match LandIQ crop seasons to MSLSP cycles using rule-based assignment (no rank cost).
+# Primary rule: ADOY inside [OGI, OGMn]; tie-break by nearest Peak, then cycle 1 before 2.
+# CLASS-aware season order: season 2 first when CLASS present; MULTIUSE D/M favors season 1;
+# then seasons 3/4. MSLSP cycles follow User Guide (cycle 1 = largest EVI2 amplitude).
+#
+# Main inputs: CCMMF_MANAGEMENT, CCMMF_LANDIQ_V4, raw_mslsp_v4.1 parquet, crops_all_years.parq.
+# Main outputs: assigned_year=Y.parquet and QC tables under phenology/matched_landiq_mslsp_v4.1.
+# How to run: Rscript match_landiq_mslsp.R (year from YEAR global or wrapper); see repo docs.
+# Workflow: monitoring workflow ASSIGN stage between MSLSP annual parquet and trait lookups.
+
+suppressPackageStartupMessages({
+  library(data.table)
+  library(arrow)
+  library(dplyr)
+  library(lubridate)
+})
+
+#### Configuration
+path_management  <- Sys.getenv("CCMMF_MANAGEMENT", "/projectnb/dietzelab/ccmmf/management")
+path_landiq_v4    <- Sys.getenv("CCMMF_LANDIQ_V4", "/projectnb/dietzelab/ccmmf/LandIQ-harmonized-v4.1")
+combined_root     <- file.path(path_management, "phenology/raw_mslsp_v4.1")
+landiq_parq       <- file.path(path_landiq_v4, "crops_all_years.parq")
+cropcode_csv      <- file.path(path_management, "LandIQ_cropCode_lookup_table.csv")
+out_dir           <- file.path(path_management, "phenology/matched_landiq_mslsp_v4.1")
+
+eps_eviamp             <- 0.01
+heterogeneity_na_frac_thr <- 0.5
+assign_active_only     <- TRUE
+
+year <- if (exists("YEAR", envir = .GlobalEnv)) get("YEAR", .GlobalEnv) else NULL
+do_subset_test <- if (exists("DO_SUBSET_TEST", envir = .GlobalEnv)) get("DO_SUBSET_TEST", .GlobalEnv) else FALSE
+sample_per_pft <- if (exists("SAMPLE_PER_PFT", envir = .GlobalEnv)) get("SAMPLE_PER_PFT", .GlobalEnv) else 500L
+assign_parcel_ids_file <- if (exists("ASSIGN_PARCEL_IDS_FILE", envir = .GlobalEnv)) get("ASSIGN_PARCEL_IDS_FILE", .GlobalEnv) else Sys.getenv("ASSIGN_PARCEL_IDS_FILE", "")
+assign_subset_ids <- if (nzchar(trimws(assign_parcel_ids_file)) && file.exists(assign_parcel_ids_file)) {
+  ids <- unique(trimws(as.character(fread(assign_parcel_ids_file)$parcel_id)))
+  ids[nzchar(ids)]
+} else character(0)
+
+#### Helpers
+# Normalize DOY for same-year wrap (e.g. OGI=350, OGMn=50). Do NOT use for cross-year DOY.
+norm_doy <- function(doy) {
+  d <- as.numeric(doy)
+  out <- d
+  ok <- !is.na(d)
+  out[ok & d < 1] <- d[ok & d < 1] + 365
+  out[ok & d > 365] <- d[ok & d > 365] - 365
+  out
+}
+
+# LandIQ: "negative ADOY = prior year (-92 = Oct 1); positive = target year" (metadata).
+# MSLSP: DOY "January 1 of target year = 1", valid -181 to 548 (User Guide). Use date comparison.
+adoy_in_window <- function(adoy, ogi, ogmn, year) {
+  yr <- as.integer(year)[1]
+  n <- max(length(adoy), length(ogi), length(ogmn))
+  adoy <- rep(as.numeric(adoy), length.out = n)
+  ogi  <- rep(as.numeric(ogi), length.out = n)
+  ogmn <- rep(as.numeric(ogmn), length.out = n)
+  out <- rep(NA, n)
+  ok <- !is.na(adoy) & !is.na(ogi) & !is.na(ogmn)
+  if (!any(ok)) return(out)
+  d0 <- as.Date(sprintf("%d-01-01", yr))
+  adoy_date <- d0 + as.integer(round(adoy[ok])) - 1L
+  ogi_date  <- d0 + as.integer(round(ogi[ok])) - 1L
+  ogmn_date <- d0 + as.integer(round(ogmn[ok])) - 1L
+  lo <- pmin(ogi_date, ogmn_date)
+  hi <- pmax(ogi_date, ogmn_date)
+  out[ok] <- (adoy_date >= lo) & (adoy_date <= hi)
+  out
+}
+
+is_real_cycle <- function(Peak, OGI, OGMn, w_valid, EVIamp, eps_amp = 0.01) {
+  ok_dates <- !is.na(Peak) & !is.na(OGI) & !is.na(OGMn)
+  ok_w <- !is.na(w_valid) & w_valid > 0
+  ok_evi <- !is.na(EVIamp) & EVIamp > eps_amp
+  ok_dates & ok_w & ok_evi
+}
+
+qc_mslsp_qa_pixel_agreement_flag <- function(gup_frac, gdown_frac, thr_good = 0.6, thr_bad = 0.3) {
+  gf <- suppressWarnings(as.numeric(gup_frac))
+  df <- suppressWarnings(as.numeric(gdown_frac))
+  m <- pmax(gf, df, na.rm = TRUE)
+  out <- rep("mixed_qa_pixel_agreement", length(m))
+  out[is.na(gf) & is.na(df)] <- "qa_not_available"
+  out[!is.na(m) & m >= thr_good] <- "high_qa_pixel_agreement"
+  out[!is.na(m) & m <= thr_bad] <- "low_qa_pixel_agreement"
+  out
+}
+
+heterogeneity_flag <- function(n_valid, w_valid, na_frac, na_frac_thr = 0.5) {
+  out <- rep("low_na_frac", length(n_valid))
+  out[is.na(n_valid) | is.na(w_valid)] <- "mslsp_metrics_missing"
+  out[!is.na(na_frac) & na_frac >= na_frac_thr] <- "high_na_frac"
+  out
+}
+
+qc_cycle_season_counts_label <- function(n_ms, n_li) {
+  if (is.na(n_ms) || n_ms == 0) return("no_mslsp_data")
+  if (is.na(n_li) || n_li == 0) return("no_landiq_active")
+  paste0(n_ms, if (n_ms == 1L) "cycle" else "cycles", "_", n_li, if (n_li == 1L) "season" else "seasons")
+}
+
+qc_mslsp_pixel_availability_flag <- function(w_valid, n_valid) {
+  has <- !is.na(w_valid) & w_valid > 0 & !is.na(n_valid) & n_valid > 0
+  out <- rep("no_pixels_or_invalid", length(w_valid))
+  out[has] <- "has_pixels"
+  out[is.na(w_valid) & is.na(n_valid)] <- "no_mslsp_data_for_cycle"
+  out
+}
+
+date_dist_days <- function(yr, doy1, doy2) {
+  n <- max(length(yr), length(doy1), length(doy2))
+  yr <- rep(as.integer(yr), length.out = n)
+  doy1 <- rep(as.numeric(doy1), length.out = n)
+  doy2 <- rep(as.numeric(doy2), length.out = n)
+  ok <- !is.na(yr) & !is.na(doy1) & !is.na(doy2)
+  out <- rep(NA_real_, n)
+  if (!any(ok)) return(out)
+  d0 <- as.Date(sprintf("%d-01-01", yr[ok]))
+  d1 <- d0 + as.integer(round(doy1[ok])) - 1L
+  d2 <- d0 + as.integer(round(doy2[ok])) - 1L
+  out[ok] <- as.numeric(d1 - d2)
+  out
+}
+
+# Convert DOY to Date (handles cross-year: doy < 1 = prior year, doy > 366 = next year)
+doy_to_date <- function(year, doy) {
+  yr <- as.integer(year)[1]
+  d <- suppressWarnings(as.numeric(doy)[1])
+  if (is.na(yr) || is.na(d)) return(as.Date(NA))
+  lubridate::ymd(paste0(yr, "-01-01")) + lubridate::days(round(d) - 1L)
+}
+
+load_combined_mslsp <- function(year, root = combined_root) {
+  year <- as.integer(year)
+  stopifnot("year must be a valid integer" = !is.na(year))
+  path <- file.path(root, paste0("year=", year), sprintf("mslsp_year=%d.parquet", year))
+  if (!file.exists(path)) stop("Combined Parquet not found: ", path)
+  d <- as.data.table(arrow::read_parquet(path))
+  stopifnot("Combined file is empty" = nrow(d) > 0L)
+  stopifnot("Combined data must have parcel_id" = "parcel_id" %in% names(d))
+  stopifnot("Combined data must have cycle column" = "cycle" %in% names(d))
+  d[, parcel_id := as.character(parcel_id)]
+  d[, year := as.integer(year)]
+  setorder(d, parcel_id, year, cycle)
+  d
+}
+
+col1 <- function(row, name) {
+  v <- row[[name]]
+  if (!is.null(v)) return(v[1])
+  v <- row[[paste0(name, ".x")]]
+  if (!is.null(v)) return(v[1])
+  NA_real_
+}
+opt <- function(row, name) { v <- row[[name]]; if (is.null(v)) NA_real_ else v[1] }
+
+mslsp_long_to_cycles <- function(long_rows) {
+  if (nrow(long_rows) == 0) return(list(mslsp_cycles = NULL, keep_reason = "no_mslsp_pixels"))
+  pid <- long_rows$parcel_id[1]
+  yr <- long_rows$year[1]
+  fill <- function(r, prefix) {
+    m <- r[[paste0(prefix, "_mean")]]
+    if (is.null(m)) NA_real_ else m
+  }
+  mslsp_cycle_list <- vector("list", nrow(long_rows))
+  for (j in seq_len(nrow(long_rows))) {
+    r <- long_rows[j]
+    cyc <- as.integer(r$cycle[1])
+    if (is.na(cyc)) cyc <- j
+    mslsp_cycle_list[[j]] <- data.table(
+      parcel_id = pid, year = yr, mslsp_cycle = cyc, mslsp_rank = j,
+      mslsp_OGI = fill(r, "OGI"), mslsp_50PCGI = fill(r, "50PCGI"), mslsp_OGMx = fill(r, "OGMx"), mslsp_Peak = fill(r, "Peak"),
+      mslsp_OGD = fill(r, "OGD"), mslsp_50PCGD = fill(r, "50PCGD"), mslsp_OGMn = fill(r, "OGMn"),
+      mslsp_EVImax = fill(r, "EVImax"), mslsp_EVIamp = fill(r, "EVIamp"), mslsp_EVIarea = fill(r, "EVIarea"),
+      mslsp_w_valid = col1(r, "w_valid"), mslsp_n_valid = as.integer(col1(r, "n_valid")), mslsp_na_frac = col1(r, "na_frac"),
+      mslsp_numObs = opt(r, "numObs_mean"), mslsp_NumCycles = opt(r, "NumCycles_mode"),
+      mslsp_gupQA_mode = opt(r, "gupQA_mode"), mslsp_gupQA_mode_frac = opt(r, "gupQA_mode_frac"),
+      mslsp_gdownQA_mode = opt(r, "gdownQA_mode"), mslsp_gdownQA_mode_frac = opt(r, "gdownQA_mode_frac")
+    )
+  }
+  mslsp_cycles <- rbindlist(mslsp_cycle_list)
+  mslsp_cycles[, cycle_real := is_real_cycle(mslsp_Peak, mslsp_OGI, mslsp_OGMn, mslsp_w_valid, mslsp_EVIamp, eps_amp = eps_eviamp)]
+  real <- mslsp_cycles[cycle_real == TRUE]
+  if (nrow(real) == 0) return(list(mslsp_cycles = NULL, keep_reason = "mslsp_cycles_filtered_out"))
+  real[, mslsp_rank := seq_len(.N)]
+  list(mslsp_cycles = real, keep_reason = if (nrow(real) >= 2) "both_mslsp_cycles_kept" else "single_mslsp_cycle_only")
+}
+
+is_valid_landiq_class <- function(x) {
+  x <- trimws(as.character(x))
+  !is.na(x) & nzchar(x) & x != "**"
+}
+
+season_component_priority <- function(season, class_code, multiuse = NA_character_) {
+  s <- suppressWarnings(as.integer(season))
+  class_ok <- is_valid_landiq_class(class_code)
+  mu <- trimws(as.character(multiuse))
+  is_distinct_or_mixed <- !is.na(mu) & mu %in% c("D", "M", "d", "m")
+
+  out <- rep(99L, length(s))
+  out[class_ok & s == 2L] <- 1L
+  out[class_ok & s == 1L & is_distinct_or_mixed] <- 2L
+  out[class_ok & s == 1L & !is_distinct_or_mixed] <- 3L
+  out[class_ok & s == 3L] <- 4L
+  out[class_ok & s == 4L] <- 5L
+  out[!class_ok] <- 20L + fifelse(is.na(s), 9L, s)
+  out
+}
+
+assignment_class_rollup <- function(DT) {
+  first_nonempty <- function(x) {
+    y <- as.character(x)
+    y <- y[!is.na(y) & nzchar(trimws(y))]
+    if (length(y) == 0) NA_character_ else y[1]
+  }
+  if (any(DT$qc_cycle_season_counts == "no_mslsp_pixels", na.rm = TRUE)) return("no_mslsp_pixels")
+  if (any(DT$qc_cycle_season_counts == "mslsp_cycles_filtered_out", na.rm = TRUE)) return("mslsp_cycles_filtered_out")
+  if (any(DT$qc_cycle_season_counts == "no_landiq_active", na.rm = TRUE)) return("no_landiq_active")
+  if (any(DT$qc_cycle_season_counts == "no_mslsp_cycle_for_season", na.rm = TRUE)) return("no_mslsp_cycle_assigned")
+  if (any(DT$qc_adoy_vs_cycle == "adoy_outside_cycle", na.rm = TRUE)) return("adoy_outside_cycle_review")
+  if (any(DT$qc_n_adoy_in_cycle == "multiple_adoy_in_cycle", na.rm = TRUE)) return("multiple_adoy_in_cycle_review")
+  cc <- first_nonempty(DT$qc_cycle_season_counts)
+  if (!is.na(cc) && !(cc %in% c("1cycle_1season", "2cycles_2seasons"))) {
+    if (cc == "2cycles_1season") return("mismatch_2cycles_1season")
+    if (cc == "1cycle_2seasons") return("mismatch_1cycle_2seasons")
+    return(paste0("mismatch_", cc))
+  }
+  if (is.na(cc)) return("mismatch_unclassified")
+  if (any(DT$qc_adoy_vs_cycle == "adoy_inside_cycle", na.rm = TRUE)) return("adoy_inside_and_single")
+  return("matched_no_adoy")
+}
+
+assign_one_4rows <- function(pid, yr, combined_row, landiq_rows) {
+  out <- data.table(
+    parcel_id = pid, year = yr, season = 1L:4L,
+    assigned_by = "no_match", assigned_woody_tiebreak = FALSE,
+    mslsp_cycle = NA_integer_,
+    landiq_PCNT = NA_real_, landiq_ADOY = NA_real_, landiq_PFT = NA_character_, landiq_CLASS = NA_character_, landiq_SUBCLASS = NA_character_, landiq_MULTIUSE = NA_character_,
+    mslsp_Peak = as.Date(NA), mslsp_OGI = as.Date(NA), mslsp_OGMn = as.Date(NA),
+    mslsp_50PCGI = as.Date(NA), mslsp_OGMx = as.Date(NA), mslsp_OGD = as.Date(NA), mslsp_50PCGD = as.Date(NA),
+    mslsp_EVImax = NA_real_, mslsp_EVIamp = NA_real_, mslsp_EVIarea = NA_real_,
+    mslsp_w_valid = NA_real_, mslsp_n_valid = NA_integer_, mslsp_na_frac = NA_real_, mslsp_numObs = NA_real_, mslsp_NumCycles = NA_real_,
+    peak_dist_days = NA_real_,
+    mslsp_gupQA_mode = NA_real_, mslsp_gupQA_mode_frac = NA_real_, mslsp_gdownQA_mode = NA_real_, mslsp_gdownQA_mode_frac = NA_real_,
+    qc_landiq_season_data = NA_character_,
+    qc_adoy_vs_cycle = NA_character_, qc_n_adoy_in_cycle = NA_character_,
+    qc_adoy_status = NA_character_, qc_cycle_status = NA_character_,
+    qc_adoy_cycle_relation = NA_character_, qc_adoy_multiplicity = NA_character_,
+    qc_mslsp_pixel_availability = NA_character_, qc_heterogeneity = NA_character_,
+    qc_cycle_season_counts = NA_character_, qc_mslsp_qa_pixel_agreement = NA_character_,
+    match_outcome = NA_character_, qc_mslsp_cycles_available = NA_character_
+  )
+
+  for (s in 1:4) {
+    r <- landiq_rows[season == s]
+    if (nrow(r)) {
+      out[season == s, `:=`(
+        landiq_PCNT = r$PCNT[1], landiq_ADOY = r$ADOY[1], landiq_PFT = r$PFT[1],
+        landiq_CLASS = if ("CLASS" %in% names(r)) r$CLASS[1] else NA_character_,
+        landiq_SUBCLASS = if ("SUBCLASS" %in% names(r)) r$SUBCLASS[1] else NA_character_,
+        landiq_MULTIUSE = if ("MULTIUSE" %in% names(r)) r$MULTIUSE[1] else NA_character_
+      )]
+      pcnt <- r$PCNT[1]
+      out[season == s, qc_landiq_season_data := fifelse(!is.na(pcnt) & pcnt >= 0, "landiq_season_has_data", NA_character_)]
+    } else {
+      out[season == s, qc_landiq_season_data := NA_character_]
+    }
+  }
+
+  active <- landiq_rows[!is.na(PCNT) & PCNT >= 0]
+  n_landiq_active <- nrow(active)
+  if (n_landiq_active == 0) {
+    out[, `:=`(
+      qc_adoy_vs_cycle = NA_character_, qc_n_adoy_in_cycle = NA_character_,
+      qc_mslsp_pixel_availability = NA_character_, qc_heterogeneity = NA_character_,
+      qc_mslsp_qa_pixel_agreement = NA_character_,
+      qc_cycle_season_counts = "no_landiq_active",
+      qc_mslsp_cycles_available = NA_character_
+    )]
+    out[, match_outcome := "no_landiq_active"]
+    return(list(assigned = out))
+  }
+
+  cyc <- mslsp_long_to_cycles(combined_row)
+  mslsp_cycles <- cyc$mslsp_cycles
+  if (is.null(mslsp_cycles) || nrow(mslsp_cycles) == 0) {
+    out[, `:=`(
+      qc_adoy_vs_cycle = NA_character_, qc_n_adoy_in_cycle = NA_character_,
+      qc_mslsp_pixel_availability = NA_character_, qc_heterogeneity = NA_character_,
+      qc_mslsp_qa_pixel_agreement = NA_character_,
+      qc_cycle_season_counts = cyc$keep_reason,
+      qc_mslsp_cycles_available = cyc$keep_reason
+    )]
+    out[, match_outcome := cyc$keep_reason]
+    return(list(assigned = out))
+  }
+
+  n_mslsp_used <- nrow(mslsp_cycles)
+  out[, qc_cycle_season_counts := qc_cycle_season_counts_label(n_mslsp_used, n_landiq_active)]
+
+  landiq_active <- active[, .(
+    landiq_season = season,
+    landiq_ADOY = ADOY,
+    landiq_PFT = if ("PFT" %in% names(active)) PFT else NA_character_,
+    landiq_CLASS = if ("CLASS" %in% names(active)) CLASS else NA_character_,
+    landiq_SUBCLASS = if ("SUBCLASS" %in% names(active)) SUBCLASS else NA_character_,
+    landiq_MULTIUSE = if ("MULTIUSE" %in% names(active)) MULTIUSE else NA_character_
+  )]
+  landiq_active[, landiq_PCNT := active$PCNT]
+  landiq_active[, component_priority := season_component_priority(landiq_season, landiq_CLASS, landiq_MULTIUSE)]
+
+  mslsp_cycles[, keytmp := 1L]
+  landiq_active[, keytmp := 1L]
+  combos <- merge(mslsp_cycles, landiq_active, by = "keytmp", allow.cartesian = TRUE)
+  combos[, keytmp := NULL]
+  combos[, has_adoy := !is.na(landiq_ADOY) & !is.na(mslsp_OGI) & !is.na(mslsp_OGMn)]
+  combos[, adoy_in_window := fifelse(has_adoy, adoy_in_window(landiq_ADOY, mslsp_OGI, mslsp_OGMn, yr), NA)]
+  combos[, peak_dist_abs := fifelse(has_adoy, abs(as.numeric(date_dist_days(yr, mslsp_Peak, landiq_ADOY))), Inf)]
+  combos[is.na(peak_dist_abs), peak_dist_abs := Inf]
+  combos[, is_woody := identical(trimws(as.character(landiq_PFT)), "woody")]
+
+  season_order <- landiq_active[order(component_priority, landiq_season), unique(landiq_season)]
+  used_cycles <- integer()
+  chosen_rows <- vector("list", length(season_order))
+  n_chosen <- 0L
+  for (sea in season_order) {
+    cand <- combos[landiq_season == sea & !(mslsp_cycle %in% used_cycles)]
+    if (nrow(cand) == 0) next
+
+    in_window <- cand[adoy_in_window == TRUE]
+    if (nrow(in_window) > 0) {
+      # Nearest Peak to ADOY, then prefer cycle 1 (MSLSP strongest cycle)
+      setorder(in_window, peak_dist_abs, mslsp_cycle)
+      pick <- in_window[1]
+      pick[, assigned_woody_tiebreak := FALSE]
+    } else {
+      # No ADOY: tie-break by season priority and mslsp_cycle (woody and non-woody)
+      woody <- cand$is_woody[1L]
+      setorder(cand, peak_dist_abs, mslsp_cycle)
+      pick <- cand[1]
+      pick[, assigned_woody_tiebreak := woody]
+    }
+
+    n_chosen <- n_chosen + 1L
+    chosen_rows[[n_chosen]] <- pick
+    used_cycles <- c(used_cycles, as.integer(pick$mslsp_cycle[1]))
+  }
+
+  chosen <- if (n_chosen > 0) rbindlist(chosen_rows[seq_len(n_chosen)], use.names = TRUE, fill = TRUE) else combos[0]
+
+  landiq_adoy_vec <- active$ADOY
+  n_in_cycle <- vapply(seq_len(nrow(chosen)), function(i) {
+    sum(adoy_in_window(landiq_adoy_vec, chosen$mslsp_OGI[i], chosen$mslsp_OGMn[i], yr), na.rm = TRUE)
+  }, integer(1))
+  chosen[, n_adoy_in_cycle := n_in_cycle]
+  chosen[has_adoy == TRUE & adoy_in_window == TRUE, n_adoy_in_cycle := pmax(n_adoy_in_cycle, 1L)]
+  chosen[, qc_n_adoy_in_cycle := fifelse(!has_adoy, "no_adoy_recorded",
+    fifelse(is.na(n_adoy_in_cycle) | n_adoy_in_cycle == 0L, "no_adoy_in_cycle",
+      fifelse(n_adoy_in_cycle == 1L, "one_adoy_in_cycle", "multiple_adoy_in_cycle")))]
+  chosen[, assigned_woody_tiebreak := if ("assigned_woody_tiebreak" %in% names(chosen)) assigned_woody_tiebreak else FALSE]
+  chosen[, qc_adoy_vs_cycle := fcase(
+    has_adoy & adoy_in_window == TRUE, "adoy_inside_cycle",
+    has_adoy & (is.na(adoy_in_window) | !adoy_in_window), "adoy_outside_cycle",
+    !has_adoy & assigned_woody_tiebreak == TRUE, "no_adoy_woody_tiebreak",
+    default = "no_adoy_recorded"
+  )]
+  chosen[, qc_mslsp_pixel_availability := qc_mslsp_pixel_availability_flag(mslsp_w_valid, mslsp_n_valid)]
+  chosen[, qc_heterogeneity := heterogeneity_flag(mslsp_n_valid, mslsp_w_valid, mslsp_na_frac, na_frac_thr = heterogeneity_na_frac_thr)]
+  chosen[, qc_mslsp_qa_pixel_agreement := qc_mslsp_qa_pixel_agreement_flag(mslsp_gupQA_mode_frac, mslsp_gdownQA_mode_frac)]
+  chosen[, qc_cycle_season_counts := qc_cycle_season_counts_label(n_mslsp_used, n_landiq_active)]
+  chosen[, qc_mslsp_cycles_available := cyc$keep_reason]
+
+  for (k in seq_len(nrow(chosen))) {
+    sea <- chosen$landiq_season[k]
+    rr <- which(out$season == sea)
+    if (length(rr) != 1) next
+    out[rr, `:=`(
+      assigned_by = "matched",
+      assigned_woody_tiebreak = chosen$assigned_woody_tiebreak[k],
+      mslsp_cycle = chosen$mslsp_cycle[k],
+      mslsp_OGI = doy_to_date(yr, chosen$mslsp_OGI[k]), mslsp_50PCGI = doy_to_date(yr, chosen$mslsp_50PCGI[k]), mslsp_OGMx = doy_to_date(yr, chosen$mslsp_OGMx[k]),
+      mslsp_Peak = doy_to_date(yr, chosen$mslsp_Peak[k]), mslsp_OGD = doy_to_date(yr, chosen$mslsp_OGD[k]), mslsp_50PCGD = doy_to_date(yr, chosen$mslsp_50PCGD[k]), mslsp_OGMn = doy_to_date(yr, chosen$mslsp_OGMn[k]),
+      mslsp_EVImax = chosen$mslsp_EVImax[k], mslsp_EVIamp = chosen$mslsp_EVIamp[k], mslsp_EVIarea = chosen$mslsp_EVIarea[k],
+      mslsp_w_valid = chosen$mslsp_w_valid[k], mslsp_n_valid = chosen$mslsp_n_valid[k], mslsp_na_frac = chosen$mslsp_na_frac[k],
+      mslsp_numObs = chosen$mslsp_numObs[k],
+      peak_dist_days = chosen$peak_dist_abs[k],
+      mslsp_gupQA_mode = chosen$mslsp_gupQA_mode[k], mslsp_gupQA_mode_frac = chosen$mslsp_gupQA_mode_frac[k],
+      mslsp_gdownQA_mode = chosen$mslsp_gdownQA_mode[k], mslsp_gdownQA_mode_frac = chosen$mslsp_gdownQA_mode_frac[k],
+      qc_adoy_vs_cycle = chosen$qc_adoy_vs_cycle[k],
+      qc_n_adoy_in_cycle = chosen$qc_n_adoy_in_cycle[k],
+      qc_mslsp_pixel_availability = qc_mslsp_pixel_availability_flag(chosen$mslsp_w_valid[k], chosen$mslsp_n_valid[k]),
+      qc_heterogeneity = heterogeneity_flag(chosen$mslsp_n_valid[k], chosen$mslsp_w_valid[k], chosen$mslsp_na_frac[k], na_frac_thr = heterogeneity_na_frac_thr),
+      qc_mslsp_qa_pixel_agreement = qc_mslsp_qa_pixel_agreement_flag(chosen$mslsp_gupQA_mode_frac[k], chosen$mslsp_gdownQA_mode_frac[k])
+    )]
+  }
+
+  out[assigned_by == "no_match", `:=`(
+    qc_adoy_vs_cycle = NA_character_, qc_n_adoy_in_cycle = NA_character_,
+    qc_mslsp_pixel_availability = NA_character_, qc_heterogeneity = NA_character_,
+    qc_mslsp_qa_pixel_agreement = NA_character_,
+    qc_cycle_season_counts = NA_character_, qc_mslsp_cycles_available = NA_character_
+  )]
+  out[assigned_by == "no_match" & qc_landiq_season_data == "landiq_season_has_data", `:=`(
+    qc_adoy_vs_cycle = "no_cycle_assigned",
+    qc_n_adoy_in_cycle = "no_cycle_assigned",
+    qc_mslsp_pixel_availability = "no_mslsp_data_for_cycle",
+    qc_heterogeneity = "no_cycle_assigned",
+    qc_mslsp_qa_pixel_agreement = "no_cycle_assigned",
+    qc_cycle_season_counts = "no_mslsp_cycle_for_season",
+    qc_mslsp_cycles_available = "no_cycle_assigned"
+  )]
+
+  out[, qc_adoy_status := fifelse(
+    qc_landiq_season_data == "landiq_season_has_data",
+    fifelse(is.na(landiq_ADOY), "no_landiq_adoy", "has_adoy"),
+    NA_character_
+  )]
+  out[, qc_cycle_status := fifelse(
+    qc_landiq_season_data == "landiq_season_has_data",
+    fifelse(assigned_by == "matched", "has_assigned_cycle", "no_assigned_cycle"),
+    NA_character_
+  )]
+  out[, qc_adoy_cycle_relation := fifelse(
+    qc_adoy_status == "has_adoy" & qc_cycle_status == "has_assigned_cycle",
+    qc_adoy_vs_cycle,
+    NA_character_
+  )]
+  out[, qc_adoy_multiplicity := fifelse(
+    qc_adoy_status == "has_adoy" & qc_cycle_status == "has_assigned_cycle",
+    qc_n_adoy_in_cycle,
+    NA_character_
+  )]
+
+  if ("NumCycles_mode" %in% names(combined_row)) out[, mslsp_NumCycles := combined_row$NumCycles_mode[1]]
+  out[assigned_by == "matched", qc_mslsp_cycles_available := cyc$keep_reason]
+
+  list(assigned = out)
+}
+
+# Extract QC summary from assigned data (file path or data.table).
+# Returns long-format: year, level, qc_dimension, category, pft, n, pct.
+# qc_mslsp_cycles_available only at field_year (avoids redundant row-level copy).
+# If out_dir is set, writes qc_summary_year=Y.csv.
+extract_qc_summary <- function(assigned_path_or_dt, out_dir = NULL) {
+  if (is.character(assigned_path_or_dt)) {
+    if (!file.exists(assigned_path_or_dt)) stop("File not found: ", assigned_path_or_dt)
+    assigned <- as.data.table(arrow::read_parquet(assigned_path_or_dt))
+    yr <- as.integer(assigned$year[1])
+  } else {
+    assigned <- as.data.table(assigned_path_or_dt)
+    yr <- as.integer(assigned$year[1])
+  }
+  pft_col <- names(assigned)[tolower(names(assigned)) %in% c("pft", "landiq_pft")][1]
+  has_pft <- !is.na(pft_col)
+
+  matched <- assigned[assigned_by == "matched"]
+  row_dims <- c(
+    "qc_adoy_vs_cycle", "qc_n_adoy_in_cycle", "qc_cycle_season_counts",
+    "qc_mslsp_pixel_availability", "qc_heterogeneity", "qc_mslsp_qa_pixel_agreement"
+  )
+  row_dims <- intersect(row_dims, names(matched))
+  out <- data.table(year = integer(), level = character(), qc_dimension = character(), category = character(), pft = character(), n = integer(), pct = double())
+  if (nrow(matched) > 0 && length(row_dims) > 0) {
+    for (d in row_dims) {
+      by_cols <- c(d, if (has_pft) pft_col else NULL)
+      tab <- matched[, .N, by = by_cols]
+      tab[, category := fifelse(is.na(get(d)), "(no value)", as.character(get(d)))]
+      tab[, pft := if (has_pft) fifelse(is.na(get(pft_col)), "(no value)", as.character(get(pft_col))) else "(all)"]
+      if (has_pft) tab[, (pft_col) := NULL]
+      tab[, (d) := NULL]
+      tab[, pct := round(100 * N / sum(N), 1), by = "pft"]
+      setnames(tab, "N", "n")
+      tab[, `:=`(year = yr, level = "row", qc_dimension = d)]
+      setcolorder(tab, c("year", "level", "qc_dimension", "category", "pft", "n", "pct"))
+      out <- rbind(out, tab)
+    }
+  }
+  fy_cols <- c("match_outcome", "qc_mslsp_cycles_available", if (has_pft) pft_col else NULL)
+  fy_rollup <- assigned[, lapply(.SD, function(x) x[!is.na(x)][1L]), by = .(parcel_id, year), .SDcols = fy_cols]
+  if (has_pft) setnames(fy_rollup, pft_col, "pft_rollup")
+  n_fy <- nrow(fy_rollup)
+  if (n_fy > 0) {
+    for (d in c("match_outcome", "qc_mslsp_cycles_available")) {
+      if (!d %in% names(fy_rollup)) next
+      by_fy <- c(d, if (has_pft) "pft_rollup" else NULL)
+      tab <- fy_rollup[, .N, by = by_fy]
+      tab[, category := fifelse(is.na(get(d)), "(no value)", as.character(get(d)))]
+      tab[, pft := if (has_pft) fifelse(is.na(pft_rollup), "(no value)", as.character(pft_rollup)) else "(all)"]
+      if (has_pft) tab[, pft_rollup := NULL]
+      tab[, (d) := NULL]
+      if (has_pft) tab[, pct := round(100 * N / sum(N), 1), by = "pft"] else tab[, pct := round(100 * N / n_fy, 1)]
+      setnames(tab, "N", "n")
+      tab[, `:=`(year = yr, level = "field_year", qc_dimension = d)]
+      setcolorder(tab, c("year", "level", "qc_dimension", "category", "pft", "n", "pct"))
+      out <- rbind(out, tab)
+    }
+  }
+  if (nrow(out) > 0) {
+    # Sort: level (row then field_year), qc_dimension (fixed order), pft (row/hay/rice/woody/noncrop then other), then n descending
+    pft_order <- c("row", "hay", "rice", "woody", "noncrop", "(no value)", "(all)")
+    dim_order <- c(
+      "qc_adoy_vs_cycle", "qc_n_adoy_in_cycle", "qc_cycle_season_counts",
+      "qc_mslsp_pixel_availability", "qc_heterogeneity", "qc_mslsp_qa_pixel_agreement",
+      "match_outcome", "qc_mslsp_cycles_available"
+    )
+    out[, level := factor(level, levels = c("row", "field_year"))]
+    out[, qc_dimension := factor(qc_dimension, levels = dim_order)]
+    out[, pft := factor(pft, levels = c(pft_order, setdiff(unique(out$pft), pft_order)))]
+    setorder(out, level, qc_dimension, pft, -n)
+    out[, level := as.character(level)]
+    out[, qc_dimension := as.character(qc_dimension)]
+    out[, pft := as.character(pft)]
+    if (length(out_dir) > 0 && nzchar(out_dir)) {
+      dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
+      out_file <- file.path(out_dir, paste0("qc_summary_year=", yr, ".csv"))
+      fwrite(out, out_file)
+      message("[QC] Summary: ", out_file)
+    }
+  }
+  invisible(out)
+}
+
+summarize_assignment <- function(year, out_path = out_dir) {
+  yr <- as.integer(year)
+  assigned_path <- file.path(out_path, paste0("assigned_year=", yr, ".parquet"))
+  if (!file.exists(assigned_path)) stop("assigned parquet not found: ", assigned_path)
+  extract_qc_summary(assigned_path, out_path)
+}
+
+run_assignment <- function(year, cr = combined_root,
+                           subset_test = do_subset_test, samp_per_pft = sample_per_pft,
+                           out_path = out_dir) {
+  yr <- as.integer(year)
+  stopifnot("year must be a valid integer" = !is.na(yr))
+  message("[1/9] Loading combined MSLSP year=", yr)
+  pheno <- load_combined_mslsp(yr, cr)
+  if (!"MULTIUSE" %in% names(pheno)) pheno[, MULTIUSE := NA_character_]
+  message("[2/9] Loading LandIQ year=", yr)
+  lookup   <- fread(cropcode_csv)
+  ag_pairs <- unique(lookup[is_agricultural == TRUE, .(CLASS = trimws(CLASS), SUBCLASS = as.character(SUBCLASS), PFT)])
+  ag_classes <- unique(ag_pairs$CLASS)
+  landiq <- as.data.table(
+    arrow::open_dataset(landiq_parq) |>
+      dplyr::filter(year == !!yr, CLASS %in% !!ag_classes) |>
+      dplyr::collect()
+  )
+  landiq[, CLASS := trimws(as.character(CLASS))]
+  landiq[, SUBCLASS := as.character(SUBCLASS)]
+  # Normalize "no subclass" so CLASS X (fallow) and other ** in lookup match: NA, "", "**" -> "**"
+  landiq[is.na(SUBCLASS) | trimws(SUBCLASS) == "" | trimws(SUBCLASS) == "**", SUBCLASS := "**"]
+  landiq[, parcel_id := trimws(as.character(parcel_id))]
+  if ("MULTIUSE" %in% names(landiq)) landiq[, MULTIUSE := trimws(as.character(MULTIUSE))]
+  landiq[, PCNT := suppressWarnings(as.numeric(PCNT))]
+  landiq[, ADOY := suppressWarnings(as.numeric(ADOY))]
+  landiq[ADOY == 0, ADOY := NA_real_]
+  landiq[, year := as.integer(year)]
+  landiq <- merge(landiq, ag_pairs, by = c("CLASS", "SUBCLASS"))
+  setkey(landiq, parcel_id, year)
+  setkey(pheno, parcel_id, year)
+  fys <- if (isTRUE(assign_active_only)) unique(landiq[!is.na(PCNT) & PCNT >= 0, .(parcel_id, year)]) else unique(landiq[, .(parcel_id, year)])
+  in_pheno <- unique(pheno[fys, nomatch = 0][, .(parcel_id, year)])
+  if (nrow(in_pheno) == 0) stop("No field-years in common between combined MSLSP and LandIQ.")
+  message("[3/9] Overlap: ", nrow(in_pheno), " field-years in both")
+  fys <- in_pheno
+  if (length(assign_subset_ids) > 0) {
+    fys[, parcel_id := as.character(parcel_id)]
+    fys <- fys[parcel_id %in% assign_subset_ids]
+    if (nrow(fys) == 0) {
+      message("[3/9] No subset parcel_ids in overlap for year ", yr, "; skipping this year.")
+      return(invisible(list(assigned = data.table())))
+    }
+    out_path <- file.path(out_path, "subsample_n400")
+  } else if (subset_test) {
+    n_take <- min(samp_per_pft, nrow(fys))
+    fys <- fys[sample(.N, n_take)]
+  }
+  setorder(fys, parcel_id, year)
+  message("[4/9] Assigning ", nrow(fys), " field-years")
+  pheno_split  <- split(pheno, pheno$parcel_id)
+  landiq_split <- split(landiq, landiq$parcel_id)
+  results <- lapply(fys$parcel_id, function(pid) {
+    cr <- pheno_split[[pid]]; if (is.null(cr)) cr <- pheno[0L]
+    lr <- landiq_split[[pid]]; if (is.null(lr)) lr <- landiq[0L]
+    assign_one_4rows(pid, yr, cr, lr)
+  })
+  assigned <- rbindlist(lapply(results, `[[`, "assigned"), fill = TRUE)
+  assigned[, match_outcome := {
+    ac <- assignment_class_rollup(.SD)
+    fcase(ac == "matched_no_adoy", "matched_no_adoy", ac == "adoy_inside_and_single", "matched_adoy_validated", default = ac)
+  }, by = .(parcel_id, year)]
+  assigned[assigned_by == "no_match" & (is.na(qc_landiq_season_data) | qc_landiq_season_data != "landiq_season_has_data"), match_outcome := NA_character_]
+  dir.create(out_path, recursive = TRUE, showWarnings = FALSE)
+  out_assigned <- file.path(out_path, paste0("assigned_year=", yr, ".parquet"))
+  arrow::write_parquet(assigned, out_assigned)
+  extract_qc_summary(assigned, out_path)
+  invisible(list(assigned = assigned))
+}
+
+if (!is.null(year)) {
+  run_assignment(year, subset_test = do_subset_test, samp_per_pft = sample_per_pft)
+}
+
+message("Loaded standalone matching script (rule-based + CLASS-aware).")
+
diff --git a/modules/data.remote/inst/ccmmf/tillage/tillage_metrics.R b/modules/data.remote/inst/ccmmf/tillage/tillage_metrics.R
new file mode 100644
index 00000000000..181ad9a617d
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/tillage/tillage_metrics.R
@@ -0,0 +1,178 @@
+# Tillage metrics from NDTI time series relative to phenology dates (OGI, OGMn).
+# For each fallow window between harvest and next planting, interpolates NDTI, smooths,
+# and derives tillage effect metrics (dplyr tibble in, tibble out).
+#
+# Main inputs: ndti_table with parcel_id, year, date, ndti_mean, ndti_sd, n_valid, PFT;
+#   phenology_table with parcel_id, year, OGI_date, OGMn_date (Date). Assign PFT before
+#   calling if NDTI lacks it.
+# Main outputs: one row per fallow window with tillage_eff and related columns.
+# How to run: source() and call tillage_metrics(); or sourced from make_events_statewide.R.
+# Workflow: monitoring workflow DERIVE stage before statewide tillage events.
+
+# Neighbor NDTI obs (n_valid > 0) before/after target_date; pooled SD when
+# target day has no obs. Vectorized over rows of q (parcel_id, target_date).
+.min_ndti_neighbor_details <- function(rel_dt, q_dt) {
+  n <- nrow(q_dt)
+  prev_date <- as.Date(rep(NA_real_, n))
+  prev_nv <- rep(NA_real_, n)
+  prev_ss <- rep(NA_real_, n)
+  foll_date <- as.Date(rep(NA_real_, n))
+  foll_nv <- rep(NA_real_, n)
+  foll_ss <- rep(NA_real_, n)
+
+  for (pid in unique(q_dt$parcel_id)) {
+    ix <- which(q_dt$parcel_id == pid)
+    rp <- rel_dt[rel_dt$parcel_id == pid, , drop = FALSE]
+    if (nrow(rp) == 0L) {
+      next
+    }
+    o <- order(rp$date)
+    rp <- rp[o, , drop = FALSE]
+    di <- as.integer(rp$date)
+    ti <- as.integer(q_dt$target_date[ix])
+
+    ip <- findInterval(ti - 1L, di)
+    hit <- ip >= 1L
+    if (any(hit)) {
+      ii <- ix[hit]
+      jj <- ip[hit]
+      prev_date[ii] <- rp$date[jj]
+      prev_nv[ii] <- rp$n_valid[jj]
+      prev_ss[ii] <- rp$ss[jj]
+    }
+
+    fi <- findInterval(ti, di) + 1L
+    ok <- fi <= length(di) & di[fi] > ti
+    if (any(ok, na.rm = TRUE)) {
+      ii <- ix[ok]
+      jj <- fi[ok]
+      foll_date[ii] <- rp$date[jj]
+      foll_nv[ii] <- rp$n_valid[jj]
+      foll_ss[ii] <- rp$ss[jj]
+    }
+  }
+
+  list(
+    prev_date = prev_date,
+    prev_nv = prev_nv,
+    prev_ss = prev_ss,
+    foll_date = foll_date,
+    foll_nv = foll_nv,
+    foll_ss = foll_ss
+  )
+}
+
+tillage_metrics <- function(ndti_table, phenology_table) {
+  ndti_work <- dplyr::as_tibble(ndti_table) |>
+    dplyr::mutate(
+      date = as.Date(date),
+      ss = ifelse(!is.na(n_valid) & n_valid > 0, n_valid * (ndti_sd^2), 0)
+    )
+
+  pheno_date <- dplyr::transmute(
+    phenology_table,
+    parcel_id,
+    year,
+    OGI_date,
+    OGMn_date
+  )
+
+  ndti_smooth <- ndti_work |>
+    dplyr::inner_join(pheno_date, by = c("parcel_id", "year")) |>
+    dplyr::arrange(parcel_id, date) |>
+    dplyr::group_by(parcel_id, year, PFT) |>
+    tidyr::complete(date = seq.Date(min(date), max(date), by = "day")) |>
+    tidyr::fill(OGMn_date, OGI_date, .direction = "downup") |>
+    dplyr::mutate(
+      mean_ndti_filled = zoo::na.approx(ndti_mean, x = date, na.rm = FALSE),
+      smoothed = as.numeric(stats::filter(mean_ndti_filled, rep(1 / 4, 4), sides = 2))
+    ) |>
+    dplyr::ungroup()
+
+  fallow_periods <- pheno_date |>
+    dplyr::arrange(parcel_id, OGI_date) |>
+    dplyr::group_by(parcel_id) |>
+    dplyr::mutate(fallow_start = OGMn_date, fallow_end = dplyr::lead(OGI_date)) |>
+    dplyr::filter(!is.na(fallow_end)) |>
+    dplyr::ungroup()
+
+  joined_fb <- dplyr::inner_join(
+    ndti_smooth,
+    fallow_periods,
+    by = "parcel_id",
+    suffix = c("_ndti", "_fallow")
+  )
+  yr_col <- if ("year_fallow" %in% names(joined_fb)) "year_fallow" else "year"
+
+  base_metrics <- joined_fb |>
+    dplyr::filter(date >= fallow_start, date <= fallow_end) |>
+    dplyr::group_by(parcel_id, fallow_start) |>
+    dplyr::summarize(
+      min_idx = which.min(smoothed),
+      minNDTI_date = date[min_idx],
+      ndti_on_minNDTI = smoothed[min_idx],
+      max_pre_idx = which.max(ifelse(date <= minNDTI_date, smoothed, -Inf)),
+      maxNDTI_pre_date = date[max_pre_idx],
+      maxNDTI_pre_min = smoothed[max_pre_idx],
+      ndti_pct_change = ((maxNDTI_pre_min - ndti_on_minNDTI) / maxNDTI_pre_min) * 100,
+      year = dplyr::first(.data[[yr_col]]),
+      PFT = dplyr::first(PFT),
+      OGMn_date = dplyr::first(fallow_start),
+      .groups = "drop"
+    )
+
+  # One row per fallow window: min-day SD / neighbor counts (was rowwise +
+  # get_metric_details per row; res_max branch was never selected - dropped).
+  smooth <- dplyr::distinct(dplyr::as_tibble(ndti_smooth), parcel_id, date, .keep_all = TRUE)
+  rel <- dplyr::filter(smooth, !is.na(n_valid), n_valid > 0)
+
+  q <- data.frame(
+    parcel_id = base_metrics$parcel_id,
+    target_date = as.Date(base_metrics$minNDTI_date),
+    stringsAsFactors = FALSE
+  )
+  row_t <- match(
+    paste(q$parcel_id, as.integer(q$target_date), sep = "\r"),
+    paste(smooth$parcel_id, as.integer(smooth$date), sep = "\r")
+  )
+  n_on <- rep(0L, nrow(q))
+  okm <- !is.na(row_t)
+  if (any(okm)) {
+    nvv <- smooth$n_valid[row_t[okm]]
+    n_on[okm] <- as.integer(ifelse(is.na(nvv), 0L, nvv))
+  }
+  tgt_sd <- rep(NA_real_, nrow(q))
+  tgt_sd[okm] <- smooth$ndti_sd[row_t[okm]]
+
+  nb <- .min_ndti_neighbor_details(rel, q)
+  sd_out <- ifelse(n_on > 0, tgt_sd, NA_real_)
+  w0 <- which(n_on == 0L)
+  if (length(w0) > 0L) {
+    nv <- ifelse(is.na(nb$prev_nv[w0]), 0, nb$prev_nv[w0]) +
+      ifelse(is.na(nb$foll_nv[w0]), 0, nb$foll_nv[w0])
+    ss <- ifelse(is.na(nb$prev_ss[w0]), 0, nb$prev_ss[w0]) +
+      ifelse(is.na(nb$foll_ss[w0]), 0, nb$foll_ss[w0])
+    pool <- sqrt(ss / nv)
+    pool[nv <= 0 | !is.finite(pool)] <- NA_real_
+    sd_out[w0] <- pool
+  }
+
+  dplyr::transmute(
+    base_metrics,
+    parcel_id,
+    year,
+    PFT,
+    OGMn_date,
+    max_date = maxNDTI_pre_date,
+    max_ndti = maxNDTI_pre_min,
+    min_date = minNDTI_date,
+    min_ndti = ndti_on_minNDTI,
+    min_n_valid = n_on,
+    min_sd = sd_out,
+    ndti_pct_change,
+    min_val_date_before = nb$prev_date,
+    min_val_n_before = nb$prev_nv,
+    min_val_date_after = nb$foll_date,
+    min_val_n_after = nb$foll_nv
+  )
+}
diff --git a/modules/data.remote/inst/ccmmf/traits/build_harvest_lookup.R b/modules/data.remote/inst/ccmmf/traits/build_harvest_lookup.R
new file mode 100644
index 00000000000..b7aa2166fa4
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/traits/build_harvest_lookup.R
@@ -0,0 +1,121 @@
+# Build long-format harvest removal / litter fraction lookup for CCMMF.
+#
+# Starts from agricultural LandIQ codes with a PFT. Uses placeholder fractions
+# by PFT (row, rice, hay, woody, woody_destructive) for AGB_REMOVED, AGB_LITTER,
+# BGB_REMOVED, BGB_LITTER. Duplicates every woody row as woody_destructive so
+# orchards can use a different harvest scenario. Stacks subclass, class, PFT,
+# and global levels so pool_calculations can use the same fallback order as traits.
+#
+# Writes plant_traits/harvest_lookup_long.rds and .csv under CCMMF_MANAGEMENT.
+#
+# Run from management repo root:
+#   Rscript scripts/traits/build_harvest_lookup.R
+# Env: CCMMF_MANAGEMENT
+
+#### Load packages
+
+suppressPackageStartupMessages({
+  library(data.table)
+  library(dplyr)
+  library(readr)
+  library(tibble)
+  library(tidyr)
+})
+
+#### Paths and constants (override root with CCMMF_MANAGEMENT)
+
+path_management <- Sys.getenv("CCMMF_MANAGEMENT", "/projectnb/dietzelab/ccmmf/management")
+landiq_lookup_csv <- file.path(path_management, "LandIQ_cropCode_lookup_table.csv")
+plant_traits_dir <- file.path(path_management, "plant_traits")
+out_rds <- file.path(plant_traits_dir, "harvest_lookup_long.rds")
+out_csv <- file.path(plant_traits_dir, "harvest_lookup_long.csv")
+
+# Agricultural LandIQ rows only; need PFT (same filter as build_planting_lookup.R).
+load_landiq_mapping <- function(path = landiq_lookup_csv) {
+  d <- as.data.frame(fread(path))
+  d %>%
+    mutate(SUBCLASS = as.character(SUBCLASS)) %>%
+    filter(is_agricultural == TRUE, !is.na(PFT)) %>%
+    transmute(class = CLASS, subclass = SUBCLASS, crop_desc = SUBCLASS_desc, class_desc = CLASS_desc, PFT = PFT)
+}
+
+# Until field-derived fractions exist, these are fixed means; sd_obs NA, n_obs 0.
+placeholder_means <- tibble::tribble(
+  ~PFT,                  ~AGB_REMOVED, ~AGB_LITTER, ~BGB_REMOVED, ~BGB_LITTER,
+  "row",                       0.80,       0.20,         0.00,        1.00,
+  "rice",                      0.80,       0.20,         0.00,        1.00,
+  "hay",                       0.75,       0.15,         0.00,        0.00,
+  "woody",                     0.15,       0.015,        0.00,        0.00,
+  "woody_destructive",         0.80,       0.20,         0.00,        1.00
+)
+
+#### Build tables by level
+
+dir.create(plant_traits_dir, recursive = TRUE, showWarnings = FALSE)
+
+cat("Loading LandIQ mapping (agricultural classes only)...\n")
+mapping <- load_landiq_mapping(landiq_lookup_csv)
+
+subclass_valid <- mapping %>%
+  distinct(class, subclass, crop_desc, PFT)
+
+# Class level uses CLASS_desc as the human-readable label (subclass left blank).
+class_valid <- mapping %>%
+  distinct(level_id = class, class_desc, PFT) %>%
+  mutate(class = level_id, crop_desc = class_desc, subclass = NA_character_) %>%
+  select(level_id, class, subclass, crop_desc, PFT)
+
+# Harvest lookup distinguishes full orchard removal (woody_destructive) from partial woody harvest.
+woody_sub <- subclass_valid %>% filter(PFT == "woody") %>% mutate(PFT = "woody_destructive")
+subclass_valid <- bind_rows(subclass_valid, woody_sub)
+
+woody_cls <- class_valid %>% filter(PFT == "woody") %>% mutate(PFT = "woody_destructive")
+class_valid <- bind_rows(class_valid, woody_cls)
+
+placeholders <- placeholder_means %>%
+  pivot_longer(-PFT, names_to = "param", values_to = "mean_obs") %>%
+  mutate(sd_obs = NA_real_, n_obs = 0L)
+
+tbl_sub <- subclass_valid %>%
+  left_join(placeholders, by = "PFT", relationship = "many-to-many") %>%
+  mutate(level = "subclass")
+
+tbl_class <- class_valid %>%
+  select(-level_id) %>%
+  left_join(placeholders, by = "PFT", relationship = "many-to-many") %>%
+  mutate(level = "class")
+
+# PFT-only rows: no spatial code; used when subclass and class both miss.
+tbl_pft <- placeholders %>%
+  mutate(
+    level = "pft",
+    class = NA_character_,
+    subclass = NA_character_,
+    crop_desc = NA_character_
+  )
+
+# Global row per param: mean of the placeholder PFT values (still placeholders).
+tbl_global <- placeholders %>%
+  group_by(param) %>%
+  summarise(
+    mean_obs = mean(mean_obs, na.rm = TRUE),
+    sd_obs = NA_real_,
+    n_obs = 0L,
+    .groups = "drop"
+  ) %>%
+  mutate(
+    level = "global",
+    class = NA_character_,
+    subclass = NA_character_,
+    crop_desc = NA_character_,
+    PFT = NA_character_
+  )
+
+harvest_lookup_long <- bind_rows(tbl_sub, tbl_class, tbl_pft, tbl_global) %>%
+  relocate(level, PFT, class, subclass, crop_desc, param, mean_obs, sd_obs, n_obs) %>%
+  arrange(level, PFT, class, subclass, param)
+
+#### Write outputs
+
+saveRDS(harvest_lookup_long, out_rds)
+write_csv(harvest_lookup_long, out_csv)
\ No newline at end of file
diff --git a/modules/data.remote/inst/ccmmf/traits/build_harvest_lookup_faostat.R b/modules/data.remote/inst/ccmmf/traits/build_harvest_lookup_faostat.R
new file mode 100644
index 00000000000..6b961b13a8b
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/traits/build_harvest_lookup_faostat.R
@@ -0,0 +1,665 @@
+# Build long-format harvest lookup from FAOSTAT-style crop statistics plus LandIQ codes.
+# Joins each FAOSTAT item to LandIQ crop_desc, aggregates DRYAD-style variables at subclass,
+# then class x PFT, then PFT, then global, and derives harvest removal fractions (mass-balanced
+# AGB using HI and yield, or clamped yield). Row levels follow coalesce(subclass, class, pft,
+# placeholder, global) so pool_calculations_from_lookup.R can fall back the same way as
+# build_harvest_lookup.R. If the Excel file is missing, uses HARVEST_PFT_SUMMARY_CSV only.
+#
+# Main inputs: CCMMF_MANAGEMENT; HARVEST_FAOSTAT_XLSX (columns item, variable, value);
+#   HARVEST_PFT_SUMMARY_CSV for PFT-only fallback; LandIQ_cropCode_lookup_table.csv.
+# Main outputs: plant_traits/harvest_lookup_long_faostat.rds and .csv (does not overwrite harvest_lookup_long.rds).
+# How to run: Rscript scripts/traits/build_harvest_lookup_faostat.R from repo root (set CCMMF_MANAGEMENT).
+# Workflow: optional harvest lookup for events; set HARVEST_LOOKUP_RDS to this RDS in make_events_statewide.R.
+
+#### Load packages
+
+suppressPackageStartupMessages({
+  library(data.table)
+  library(dplyr)
+  library(readr)
+  library(readxl)
+  library(stringr)
+  library(tibble)
+  library(tidyr)
+})
+
+#### Paths and constants
+
+path_management <- Sys.getenv("CCMMF_MANAGEMENT", "/projectnb/dietzelab/ccmmf/management")
+landiq_lookup_csv <- file.path(path_management, "LandIQ_cropCode_lookup_table.csv")
+plant_traits_dir <- file.path(path_management, "plant_traits")
+out_rds <- file.path(plant_traits_dir, "harvest_lookup_long_faostat.rds")
+out_csv <- file.path(plant_traits_dir, "harvest_lookup_long_faostat.csv")
+
+faostat_xlsx <- Sys.getenv(
+  "HARVEST_FAOSTAT_XLSX",
+  "/projectnb/dietzelab/mkim/Harvest Data/new_output.xlsx"
+)
+pft_summary_csv <- Sys.getenv(
+  "HARVEST_PFT_SUMMARY_CSV",
+  "/projectnb/dietzelab/mkim/Harvest Data/harvest_pools_output.csv"
+)
+
+rs_root_shoot <- 0.20
+
+harvest_param_names <- c("AGB_REMOVED", "AGB_LITTER", "BGB_REMOVED", "BGB_LITTER")
+
+placeholder_means_wide <- tibble::tribble(
+  ~PFT,                  ~AGB_REMOVED, ~AGB_LITTER, ~BGB_REMOVED, ~BGB_LITTER,
+  "row",                       0.80,       0.20,         0.00,        1.00,
+  "rice",                      0.80,       0.20,         0.00,        1.00,
+  "hay",                       0.75,       0.15,         0.00,        0.00,
+  "woody",                     0.15,       0.015,        0.00,        0.00,
+  "woody_destructive",         0.80,       0.20,         0.00,        1.00
+)
+
+req_var_names <- c(
+  "Maximum_Above_ground_biomass_kg_DM_ha",
+  "Mean_Grain_yield_kg_DM_ha",
+  "HI",
+  "CR_removed_pc",
+  "Mean_Above_ground_biomass_kg_DM_ha",
+  "Yield_kg_DM_ha",
+  "Mean_Harvest_index",
+  "Ratio_residues_removed_from_field"
+)
+
+#### LandIQ load and FAOSTAT item to crop_desc mapping
+
+# Agricultural LandIQ rows only; need PFT (same as build_planting_lookup / harvest).
+load_landiq_mapping <- function(path = landiq_lookup_csv) {
+  d <- as.data.frame(fread(path))
+  d %>%
+    mutate(SUBCLASS = as.character(SUBCLASS)) %>%
+    filter(is_agricultural == TRUE, !is.na(PFT)) %>%
+    transmute(class = CLASS, subclass = SUBCLASS, crop_desc = SUBCLASS_desc, class_desc = CLASS_desc, PFT = PFT)
+}
+
+norm_item <- function(x) {
+  x <- tolower(trimws(as.character(x)))
+  x[!nzchar(x) | is.na(x)] <- NA_character_
+  x
+}
+
+# Exact LandIQ SUBCLASS_desc strings for FAOSTAT items that do not match automatically.
+manual_item_to_crop <- tibble::tibble(
+  item_clean = norm_item(c(
+    "maize", "rice, paddy", "soybeans", "wheat", "barley", "oats", "sunflower seed",
+    "rapeseed", "cotton", "sugar beets", "potatoes", "tomatoes", "bananas",
+    "cassava", "coconuts", "triticale", "lentils", "sesame seed", "tea",
+    "tobacco, unmanufactured", "vetches", "alfalfa"
+  )),
+  crop_desc = c(
+    "Corn (field & sweet)",
+    "Rice",
+    "Beans (dry)",
+    "Wheat",
+    "Barley",
+    "Oats",
+    "Sunflowers",
+    "Safflower",
+    "Cotton",
+    "Sugar beets",
+    "Potatoes",
+    "Tomatoes (processing)",
+    "Miscellaneous deciduous",
+    "Miscellaneous field",
+    "Miscellaneous deciduous",
+    "Miscellaneous grain and hay",
+    "Beans (dry)",
+    "Castor beans",
+    "Miscellaneous field",
+    "Miscellaneous field",
+    "Beans (dry)",
+    "Alfalfa and alfalfa mixtures"
+  )
+)
+
+map_item_to_landiq <- function(item_clean, codes_tbl, manual_tbl) {
+  if (is.na(item_clean)) {
+    return(tibble::tibble(
+      crop_desc = NA_character_, class = NA_character_,
+      subclass = NA_character_, PFT = NA_character_
+    ))
+  }
+  hit <- manual_tbl %>% dplyr::filter(.data$item_clean == item_clean)
+  if (nrow(hit) == 1L) {
+    m <- codes_tbl %>% dplyr::filter(.data$crop_desc == hit$crop_desc[1])
+    if (nrow(m) >= 1L) {
+      return(m %>% dplyr::slice(1) %>% dplyr::select(crop_desc, class, subclass, PFT))
+    }
+  }
+  hits <- codes_tbl %>% dplyr::filter(.data$norm_crop == item_clean)
+  if (nrow(hits) == 1L) {
+    return(hits %>% dplyr::select(crop_desc, class, subclass, PFT))
+  }
+  if (nrow(hits) > 1L) {
+    return(hits %>% dplyr::arrange(nchar(.data$crop_desc)) %>% dplyr::slice(1) %>%
+      dplyr::select(crop_desc, class, subclass, PFT))
+  }
+  hits <- codes_tbl %>% dplyr::filter(stringr::str_detect(.data$norm_crop, stringr::fixed(item_clean)))
+  if (nrow(hits) >= 1L) {
+    return(hits %>% dplyr::arrange(nchar(.data$crop_desc)) %>% dplyr::slice(1) %>%
+      dplyr::select(crop_desc, class, subclass, PFT))
+  }
+  words <- unlist(stringr::str_split(item_clean, "\\s+"))
+  words <- words[nzchar(words)]
+  if (length(words) > 0L) {
+    patt <- paste0("\\b(", paste(stringr::str_escape(words), collapse = "|"), ")\\b")
+    hits <- codes_tbl %>% dplyr::filter(stringr::str_detect(.data$norm_crop, patt))
+    if (nrow(hits) >= 1L) {
+      return(hits %>% dplyr::arrange(nchar(.data$crop_desc)) %>% dplyr::slice(1) %>%
+        dplyr::select(crop_desc, class, subclass, PFT))
+    }
+  }
+  tibble::tibble(
+    crop_desc = NA_character_, class = NA_character_,
+    subclass = NA_character_, PFT = NA_character_
+  )
+}
+
+#### Harvest fractions from wide trait row (HI, yield, residue)
+
+harvest_fractions_from_wide <- function(df_wide, pft_for_placeholder, cr_fallback) {
+  for (cn in req_var_names) {
+    if (!cn %in% names(df_wide)) {
+      df_wide[[cn]] <- NA_real_
+    }
+  }
+  cr_use <- suppressWarnings(as.numeric(df_wide$CR_removed_pc[1]))
+  if (is.na(cr_use)) {
+    cr_use <- cr_fallback
+  }
+  cr <- dplyr::coalesce(
+    suppressWarnings(as.numeric(df_wide$CR_removed_pc[1])),
+    suppressWarnings(as.numeric(df_wide$Ratio_residues_removed_from_field[1])),
+    cr_use
+  )
+  if (is.na(cr) || !is.finite(cr)) {
+    cr <- 0
+  }
+  cr <- pmin(pmax(cr, 0), 100)
+
+  agb <- suppressWarnings(as.numeric(df_wide$Mean_Above_ground_biomass_kg_DM_ha[1]))
+  if (is.na(agb) || !is.finite(agb) || agb <= 0) {
+    agb <- suppressWarnings(as.numeric(df_wide$Maximum_Above_ground_biomass_kg_DM_ha[1]))
+  }
+  hi_raw <- dplyr::coalesce(
+    suppressWarnings(as.numeric(df_wide$HI[1])),
+    suppressWarnings(as.numeric(df_wide$Mean_Harvest_index[1]))
+  )
+  yield_obs <- dplyr::coalesce(
+    suppressWarnings(as.numeric(df_wide$Yield_kg_DM_ha[1])),
+    suppressWarnings(as.numeric(df_wide$Mean_Grain_yield_kg_DM_ha[1]))
+  )
+
+  yield_use <- NA_real_
+  residue <- NA_real_
+  if (!is.na(hi_raw) && is.finite(hi_raw)) {
+    hi_c <- pmin(pmax(hi_raw, 0), 1)
+    yield_use <- hi_c * agb
+    residue <- agb - yield_use
+  } else if (!is.na(yield_obs) && is.finite(yield_obs) && !is.na(agb) && agb > 0) {
+    yield_use <- pmin(pmax(yield_obs, 0), agb)
+    residue <- agb - yield_use
+  }
+
+  ph <- placeholder_means_wide %>% dplyr::filter(.data$PFT == pft_for_placeholder)
+  if (nrow(ph) != 1L) {
+    ph <- placeholder_means_wide %>% dplyr::filter(.data$PFT == "row")
+  }
+
+  if (is.na(agb) || !is.finite(agb) || agb <= 0 || is.na(yield_use) || is.na(residue)) {
+    return(c(
+      AGB_REMOVED = ph$AGB_REMOVED[1], AGB_LITTER = ph$AGB_LITTER[1],
+      BGB_REMOVED = ph$BGB_REMOVED[1], BGB_LITTER = ph$BGB_LITTER[1],
+      source = "placeholder"
+    ))
+  }
+
+  agb_rm <- yield_use + residue * (cr / 100)
+  agb_lit <- residue * (1 - cr / 100)
+  c(
+    AGB_REMOVED = agb_rm / agb, AGB_LITTER = agb_lit / agb,
+    BGB_REMOVED = 0, BGB_LITTER = 1,
+    source = "calculated"
+  )
+}
+
+agg_to_fraction_row <- function(agg_slice, pft_label, cr_fb) {
+  if (nrow(agg_slice) == 0L) {
+    return(NULL)
+  }
+  wide <- agg_slice %>%
+    dplyr::select(variable, value) %>%
+    tidyr::pivot_wider(names_from = variable, values_from = value)
+  v <- harvest_fractions_from_wide(wide, pft_label, cr_fb)
+  tibble::as_tibble(as.list(v))
+}
+
+#### Read FAOSTAT Excel and PFT summary CSV fallback
+
+read_faostat_excel <- function(path_xlsx) {
+  if (!nzchar(path_xlsx) || !file.exists(path_xlsx)) {
+    return(NULL)
+  }
+  tryCatch(
+    readxl::read_excel(path_xlsx),
+    error = function(e) {
+      warning("Could not read HARVEST_FAOSTAT_XLSX: ", conditionMessage(e))
+      NULL
+    }
+  )
+}
+
+try_pft_summary_csv <- function(path_csv) {
+  if (!nzchar(path_csv) || !file.exists(path_csv)) {
+    return(tibble::tibble())
+  }
+  raw <- readr::read_csv(path_csv, show_col_types = FALSE)
+  pc_cols <- c("AGB_REMOVED_pc", "AGB_LITTER_pc", "BGB_REMOVED_pc", "BGB_LITTER_pc")
+  if (!all(pc_cols %in% names(raw))) {
+    stop("HARVEST_PFT_SUMMARY_CSV must contain columns: ", paste(pc_cols, collapse = ", "))
+  }
+  raw %>%
+    dplyr::mutate(
+      AGB_REMOVED = .data$AGB_REMOVED_pc / 100,
+      AGB_LITTER = .data$AGB_LITTER_pc / 100,
+      BGB_REMOVED = .data$BGB_REMOVED_pc / 100,
+      BGB_LITTER = .data$BGB_LITTER_pc / 100,
+      n_crops = if ("n_crops" %in% names(raw)) as.integer(.data$n_crops) else 0L,
+      source = if ("source" %in% names(raw)) as.character(.data$source) else "csv"
+    ) %>%
+    dplyr::select(PFT, AGB_REMOVED, AGB_LITTER, BGB_REMOVED, BGB_LITTER, n_crops, source)
+}
+
+merge_pft_with_placeholders <- function(summary_tbl) {
+  ph <- placeholder_means_wide %>% dplyr::mutate(PFT = as.character(PFT))
+  if (nrow(summary_tbl) == 0) {
+    return(ph %>% dplyr::mutate(n_crops = 0L, source = "placeholder"))
+  }
+  keyed <- summary_tbl %>%
+    dplyr::mutate(PFT = as.character(PFT)) %>%
+    dplyr::rename(
+      AGB_REMOVED_f = AGB_REMOVED,
+      AGB_LITTER_f = AGB_LITTER,
+      BGB_REMOVED_f = BGB_REMOVED,
+      BGB_LITTER_f = BGB_LITTER
+    )
+  ph %>%
+    dplyr::left_join(keyed, by = "PFT") %>%
+    dplyr::mutate(
+      AGB_REMOVED = dplyr::coalesce(.data$AGB_REMOVED_f, .data$AGB_REMOVED),
+      AGB_LITTER = dplyr::coalesce(.data$AGB_LITTER_f, .data$AGB_LITTER),
+      BGB_REMOVED = dplyr::coalesce(.data$BGB_REMOVED_f, .data$BGB_REMOVED),
+      BGB_LITTER = dplyr::coalesce(.data$BGB_LITTER_f, .data$BGB_LITTER),
+      n_crops = dplyr::coalesce(as.integer(.data$n_crops), 0L),
+      source = dplyr::coalesce(.data$source, "placeholder")
+    ) %>%
+    dplyr::select(PFT, AGB_REMOVED, AGB_LITTER, BGB_REMOVED, BGB_LITTER, n_crops, source)
+}
+
+#### LandIQ subclass and class frames (woody vs woody_destructive)
+
+# Same woody / woody_destructive duplication as build_harvest_lookup.R.
+landiq_subclass_class_frames <- function(mapping_df) {
+  subclass_valid <- mapping_df %>% dplyr::distinct(class, subclass, crop_desc, PFT)
+  woody_sub <- subclass_valid %>% dplyr::filter(PFT == "woody") %>% dplyr::mutate(PFT = "woody_destructive")
+  subclass_valid <- dplyr::bind_rows(subclass_valid, woody_sub)
+  class_valid <- mapping_df %>%
+    dplyr::distinct(level_id = class, class_desc, PFT) %>%
+    dplyr::mutate(class = level_id, crop_desc = class_desc, subclass = NA_character_) %>%
+    dplyr::select(level_id, class, subclass, crop_desc, PFT)
+  woody_cls <- class_valid %>% dplyr::filter(PFT == "woody") %>% dplyr::mutate(PFT = "woody_destructive")
+  class_valid <- dplyr::bind_rows(class_valid, woody_cls)
+  list(subclass_valid = subclass_valid, class_valid = class_valid)
+}
+
+# Long harvest params from wide PFT table (CSV path or internal); same shape as build_harvest_lookup join.
+pft_wide_to_placeholders_long <- function(pft_wide_tbl) {
+  pft_wide_tbl %>%
+    tidyr::pivot_longer(
+      c(AGB_REMOVED, AGB_LITTER, BGB_REMOVED, BGB_LITTER),
+      names_to = "param",
+      values_to = "mean_obs"
+    ) %>%
+    dplyr::mutate(
+      sd_obs = NA_real_,
+      n_obs = as.integer(ifelse(.data$source == "placeholder" | is.na(.data$n_crops), 0L, .data$n_crops))
+    ) %>%
+    dplyr::select(PFT, param, mean_obs, sd_obs, n_obs)
+}
+
+# Stack subclass / class / pft / global from PFT-only long rows (CSV fallback = build_harvest_lookup.R layout).
+harvest_long_from_pft_placeholders <- function(mapping_df, placeholders_long) {
+  skel <- landiq_subclass_class_frames(mapping_df)
+  tbl_sub <- skel$subclass_valid %>%
+    dplyr::left_join(placeholders_long, by = "PFT", relationship = "many-to-many") %>%
+    dplyr::mutate(level = "subclass")
+  tbl_class <- skel$class_valid %>%
+    dplyr::select(-level_id) %>%
+    dplyr::left_join(placeholders_long, by = "PFT", relationship = "many-to-many") %>%
+    dplyr::mutate(level = "class")
+  tbl_pft <- placeholders_long %>%
+    dplyr::mutate(level = "pft", class = NA_character_, subclass = NA_character_, crop_desc = NA_character_)
+  tbl_global <- placeholders_long %>%
+    dplyr::group_by(param) %>%
+    dplyr::summarise(mean_obs = mean(mean_obs, na.rm = TRUE), sd_obs = NA_real_, n_obs = 0L, .groups = "drop") %>%
+    dplyr::mutate(
+      level = "global",
+      class = NA_character_,
+      subclass = NA_character_,
+      crop_desc = NA_character_,
+      PFT = NA_character_
+    )
+  dplyr::bind_rows(tbl_sub, tbl_class, tbl_pft, tbl_global) %>%
+    dplyr::relocate(level, PFT, class, subclass, crop_desc, param, mean_obs, sd_obs, n_obs) %>%
+    dplyr::arrange(level, PFT, class, subclass, param)
+}
+
+write_harvest_lookup_outputs <- function(harvest_lookup_long, rds_path, csv_path) {
+  saveRDS(harvest_lookup_long, rds_path)
+  readr::write_csv(harvest_lookup_long, csv_path)
+  cat("Wrote ", rds_path, "\n", sep = "")
+  cat("Wrote ", csv_path, "\n", sep = "")
+}
+
+#### Create output directory
+
+dir.create(plant_traits_dir, recursive = TRUE, showWarnings = FALSE)
+
+#### Load LandIQ
+
+cat("Loading LandIQ mapping (agricultural classes only)...\n")
+mapping <- load_landiq_mapping(landiq_lookup_csv)
+codes <- mapping %>%
+  dplyr::distinct(crop_desc, class, subclass, PFT) %>%
+  dplyr::mutate(norm_crop = norm_item(.data$crop_desc))
+
+harvest_tbl <- read_faostat_excel(faostat_xlsx)
+use_excel <- !is.null(harvest_tbl) &&
+  all(c("item", "variable", "value") %in% names(harvest_tbl))
+
+if (!use_excel) {
+  cat("No usable FAOSTAT Excel; using PFT summary CSV path.\n")
+  summary_csv <- try_pft_summary_csv(pft_summary_csv)
+  if (nrow(summary_csv) == 0) {
+    stop("Need HARVEST_FAOSTAT_XLSX with item/variable/value or HARVEST_PFT_SUMMARY_CSV.")
+  }
+  pft_wide <- merge_pft_with_placeholders(summary_csv)
+  pl <- pft_wide_to_placeholders_long(pft_wide)
+  harvest_lookup_long <- harvest_long_from_pft_placeholders(mapping, pl)
+  write_harvest_lookup_outputs(harvest_lookup_long, out_rds, out_csv)
+  quit(save = "no", status = 0)
+}
+
+cat("Joining FAOSTAT items to LandIQ crop_desc (planting-style identity)...\n")
+
+cr_fb <- NA_real_
+if ("original_crop" %in% names(harvest_tbl)) {
+  cr_fb <- harvest_tbl %>%
+    dplyr::filter(.data$original_crop == "All_crops", .data$variable == "CR_removed_pc") %>%
+    dplyr::summarise(v = mean(as.numeric(.data$value), na.rm = TRUE)) %>%
+    dplyr::pull(v)
+}
+
+traits_long <- harvest_tbl %>%
+  dplyr::mutate(
+    item_clean = norm_item(.data$item),
+    value_num = suppressWarnings(as.numeric(.data$value))
+  ) %>%
+  dplyr::filter(!is.na(.data$item_clean), !is.na(.data$variable))
+
+uniq_items <- unique(traits_long$item_clean)
+item_rows <- vector("list", length(uniq_items))
+for (ii in seq_along(uniq_items)) {
+  item_rows[[ii]] <- map_item_to_landiq(uniq_items[ii], codes, manual_item_to_crop) %>%
+    dplyr::mutate(item_clean = uniq_items[ii])
+}
+item_map <- dplyr::bind_rows(item_rows)
+
+n_mapped <- sum(!is.na(item_map$class))
+cat("  Mapped ", n_mapped, " of ", nrow(item_map), " distinct items to a LandIQ subclass.\n", sep = "")
+
+harvest_join <- traits_long %>%
+  dplyr::left_join(item_map, by = "item_clean") %>%
+  dplyr::filter(!is.na(.data$class), !is.na(.data$subclass))
+
+cat("  Trait rows after LandIQ join: ", nrow(harvest_join), "\n", sep = "")
+
+if (nrow(harvest_join) == 0L) {
+  stop(
+    "No FAOSTAT rows joined to LandIQ (check item names vs SUBCLASS_desc and manual_item_to_crop). ",
+    "Or use HARVEST_PFT_SUMMARY_CSV only by removing/renaming the Excel path."
+  )
+}
+
+#### Summarize at subclass (mean per variable, like TRY to subclass)
+
+sub_agg <- harvest_join %>%
+  dplyr::group_by(class, subclass, crop_desc, PFT, variable) %>%
+  dplyr::summarise(
+    value = mean(.data$value_num, na.rm = TRUE),
+    n_obs = dplyr::n(),
+    .groups = "drop"
+  )
+
+sub_keys <- sub_agg %>%
+  dplyr::distinct(class, subclass, crop_desc, PFT)
+
+cat("Computing harvest fractions per subclass key...\n")
+sub_frac_list <- vector("list", nrow(sub_keys))
+for (i in seq_len(nrow(sub_keys))) {
+  k <- sub_keys[i, , drop = FALSE]
+  sl <- sub_agg %>%
+    dplyr::filter(
+      .data$class == k$class[1], .data$subclass == k$subclass[1],
+      .data$crop_desc == k$crop_desc[1], .data$PFT == k$PFT[1]
+    )
+  fr <- agg_to_fraction_row(sl, k$PFT[1], cr_fb)
+  if (!is.null(fr)) {
+    sub_frac_list[[i]] <- dplyr::bind_cols(k, fr)
+  }
+}
+frac_sub <- dplyr::bind_rows(sub_frac_list)
+
+#### Summarize at class x PFT (re-aggregate raw rows, like planting class level)
+
+class_agg <- harvest_join %>%
+  dplyr::group_by(class, PFT, variable) %>%
+  dplyr::summarise(value = mean(.data$value_num, na.rm = TRUE), n_obs = dplyr::n(), .groups = "drop")
+
+class_keys <- class_agg %>% dplyr::distinct(class, PFT)
+
+cat("Computing harvest fractions per class x PFT...\n")
+class_frac_list <- vector("list", nrow(class_keys))
+for (i in seq_len(nrow(class_keys))) {
+  k <- class_keys[i, , drop = FALSE]
+  sl <- class_agg %>%
+    dplyr::filter(.data$class == k$class[1], .data$PFT == k$PFT[1])
+  fr <- agg_to_fraction_row(sl, k$PFT[1], cr_fb)
+  if (!is.null(fr)) {
+    class_frac_list[[i]] <- dplyr::bind_cols(
+      tibble::tibble(class = k$class[1], PFT = k$PFT[1]),
+      fr
+    )
+  }
+}
+frac_class <- dplyr::bind_rows(class_frac_list)
+
+#### Summarize at PFT and global
+
+pft_agg <- harvest_join %>%
+  dplyr::group_by(PFT, variable) %>%
+  dplyr::summarise(value = mean(.data$value_num, na.rm = TRUE), n_obs = dplyr::n(), .groups = "drop")
+
+pft_keys <- pft_agg %>% dplyr::distinct(PFT)
+
+cat("Computing harvest fractions per PFT...\n")
+pft_frac_list <- vector("list", nrow(pft_keys))
+for (i in seq_len(nrow(pft_keys))) {
+  pf <- pft_keys$PFT[i]
+  sl <- pft_agg %>% dplyr::filter(.data$PFT == pf)
+  fr <- agg_to_fraction_row(sl, pf, cr_fb)
+  if (!is.null(fr)) {
+    pft_frac_list[[i]] <- dplyr::bind_cols(tibble::tibble(PFT = pf), fr)
+  }
+}
+frac_pft <- dplyr::bind_rows(pft_frac_list)
+
+glob_agg <- harvest_join %>%
+  dplyr::group_by(variable) %>%
+  dplyr::summarise(value = mean(.data$value_num, na.rm = TRUE), .groups = "drop")
+dom_pft <- names(sort(table(harvest_join$PFT), decreasing = TRUE))[1]
+if (is.na(dom_pft)) {
+  dom_pft <- "row"
+}
+fr_g <- agg_to_fraction_row(
+  glob_agg %>% dplyr::transmute(variable, value),
+  dom_pft,
+  cr_fb
+)
+if (is.null(fr_g) || nrow(fr_g) == 0L) {
+  stop("Global harvest fraction row is empty; check FAOSTAT variables.")
+}
+fr_g_num <- function(nm) suppressWarnings(as.numeric(fr_g[[nm]][1]))
+
+ph_wide <- placeholder_means_wide %>% dplyr::mutate(PFT = as.character(PFT))
+ph_long <- ph_wide %>%
+  tidyr::pivot_longer(c(AGB_REMOVED, AGB_LITTER, BGB_REMOVED, BGB_LITTER),
+    names_to = "param", values_to = "ph"
+  )
+
+frac_sub_l <- frac_sub %>%
+  tidyr::pivot_longer(c(AGB_REMOVED, AGB_LITTER, BGB_REMOVED, BGB_LITTER),
+    names_to = "param", values_to = "v_sub"
+  ) %>%
+  dplyr::select(class, subclass, crop_desc, PFT, param, v_sub)
+frac_class_l <- frac_class %>%
+  tidyr::pivot_longer(c(AGB_REMOVED, AGB_LITTER, BGB_REMOVED, BGB_LITTER),
+    names_to = "param", values_to = "v_class"
+  ) %>%
+  dplyr::select(class, PFT, param, v_class)
+frac_pft_l <- frac_pft %>%
+  tidyr::pivot_longer(c(AGB_REMOVED, AGB_LITTER, BGB_REMOVED, BGB_LITTER),
+    names_to = "param", values_to = "v_pft"
+  ) %>%
+  dplyr::select(PFT, param, v_pft)
+
+g_vals <- tibble::tibble(
+  param = harvest_param_names,
+  v_glob = c(
+    fr_g_num("AGB_REMOVED"), fr_g_num("AGB_LITTER"),
+    fr_g_num("BGB_REMOVED"), fr_g_num("BGB_LITTER")
+  )
+)
+
+#### Assemble long lookup (coalesce subclass, class, pft, placeholder, global)
+
+skel <- landiq_subclass_class_frames(mapping)
+
+tbl_sub <- skel$subclass_valid %>%
+  tidyr::crossing(param = harvest_param_names) %>%
+  dplyr::left_join(frac_sub_l, by = c("class", "subclass", "crop_desc", "PFT", "param")) %>%
+  dplyr::left_join(frac_class_l, by = c("class", "PFT", "param")) %>%
+  dplyr::left_join(frac_pft_l, by = c("PFT", "param")) %>%
+  dplyr::left_join(g_vals, by = "param") %>%
+  dplyr::left_join(ph_long, by = c("PFT", "param")) %>%
+  dplyr::mutate(
+    # Placeholder before global so woody_destructive does not take the global cereal mean.
+    mean_obs = dplyr::coalesce(
+      suppressWarnings(as.numeric(.data$v_sub)),
+      suppressWarnings(as.numeric(.data$v_class)),
+      suppressWarnings(as.numeric(.data$v_pft)),
+      suppressWarnings(as.numeric(.data$ph)),
+      suppressWarnings(as.numeric(.data$v_glob))
+    ),
+    src = dplyr::case_when(
+      !is.na(.data$v_sub) ~ "subclass",
+      !is.na(.data$v_class) ~ "class",
+      !is.na(.data$v_pft) ~ "pft",
+      !is.na(.data$v_glob) ~ "global",
+      TRUE ~ "placeholder"
+    ),
+    sd_obs = NA_real_,
+    n_obs = 0L,
+    level = "subclass"
+  ) %>%
+  dplyr::select(level, PFT, class, subclass, crop_desc, param, mean_obs, sd_obs, n_obs, src)
+
+tbl_class <- skel$class_valid %>%
+  dplyr::select(-level_id) %>%
+  tidyr::crossing(param = harvest_param_names) %>%
+  dplyr::left_join(frac_class_l, by = c("class", "PFT", "param")) %>%
+  dplyr::left_join(frac_pft_l, by = c("PFT", "param")) %>%
+  dplyr::left_join(g_vals, by = "param") %>%
+  dplyr::left_join(ph_long, by = c("PFT", "param")) %>%
+  dplyr::mutate(
+    mean_obs = dplyr::coalesce(
+      suppressWarnings(as.numeric(.data$v_class)),
+      suppressWarnings(as.numeric(.data$v_pft)),
+      suppressWarnings(as.numeric(.data$ph)),
+      suppressWarnings(as.numeric(.data$v_glob))
+    ),
+    src = dplyr::case_when(
+      !is.na(.data$v_class) ~ "class",
+      !is.na(.data$v_pft) ~ "pft",
+      !is.na(.data$v_glob) ~ "global",
+      TRUE ~ "placeholder"
+    ),
+    sd_obs = NA_real_,
+    n_obs = 0L,
+    level = "class"
+  ) %>%
+  dplyr::select(level, PFT, class, subclass, crop_desc, param, mean_obs, sd_obs, n_obs, src)
+
+tbl_pft <- tidyr::crossing(
+  PFT = unique(ph_wide$PFT),
+  param = harvest_param_names
+) %>%
+  dplyr::left_join(frac_pft_l, by = c("PFT", "param")) %>%
+  dplyr::left_join(g_vals, by = "param") %>%
+  dplyr::left_join(ph_long, by = c("PFT", "param")) %>%
+  dplyr::mutate(
+    mean_obs = dplyr::coalesce(
+      suppressWarnings(as.numeric(.data$v_pft)),
+      suppressWarnings(as.numeric(.data$ph)),
+      suppressWarnings(as.numeric(.data$v_glob))
+    ),
+    sd_obs = NA_real_,
+    n_obs = 0L,
+    level = "pft",
+    class = NA_character_,
+    subclass = NA_character_,
+    crop_desc = NA_character_
+  ) %>%
+  dplyr::select(level, PFT, class, subclass, crop_desc, param, mean_obs, sd_obs, n_obs)
+
+tbl_global <- tibble::tibble(
+  param = harvest_param_names,
+  mean_obs = c(
+    fr_g_num("AGB_REMOVED"), fr_g_num("AGB_LITTER"),
+    fr_g_num("BGB_REMOVED"), fr_g_num("BGB_LITTER")
+  ),
+  sd_obs = NA_real_,
+  n_obs = 0L,
+  level = "global",
+  class = NA_character_,
+  subclass = NA_character_,
+  crop_desc = NA_character_,
+  PFT = NA_character_
+) %>%
+  dplyr::select(level, PFT, class, subclass, crop_desc, param, mean_obs, sd_obs, n_obs)
+
+harvest_lookup_long <- dplyr::bind_rows(
+  tbl_sub %>% dplyr::select(-src),
+  tbl_class %>% dplyr::select(-src),
+  tbl_pft,
+  tbl_global
+) %>%
+  dplyr::relocate(level, PFT, class, subclass, crop_desc, param, mean_obs, sd_obs, n_obs) %>%
+  dplyr::arrange(level, PFT, class, subclass, param)
+
+#### Write RDS and CSV
+
+write_harvest_lookup_outputs(harvest_lookup_long, out_rds, out_csv)
diff --git a/modules/data.remote/inst/ccmmf/traits/build_planting_lookup.R b/modules/data.remote/inst/ccmmf/traits/build_planting_lookup.R
new file mode 100644
index 00000000000..69936a7edcf
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/traits/build_planting_lookup.R
@@ -0,0 +1,342 @@
+# Build long-format planting trait lookup for CCMMF / SIPNET-style pool init.
+#
+# Reads TRY master_data, maps each observation's species to a LandIQ SUBCLASS_desc
+# crop group (genus lookup), converts trait values to common units, then keeps
+# only rows that join to an agricultural LandIQ code with a PFT. Aggregates to
+# four fallback levels used downstream: subclass (class+subclass), class crossed
+# with PFT, PFT-only, and global. TRY SLA traits 3115-3117 are merged into one
+# pooled SLA trait (SLA_POOLED) before aggregation.
+#
+# Writes plant_traits/planting_lookup_long.rds and planting_lookup_long.csv
+# under CCMMF_MANAGEMENT (default: .../ccmmf/management).
+#
+# Run from management repo root:
+#   Rscript scripts/traits/build_planting_lookup.R
+# Env: CCMMF_MANAGEMENT, TRY_MASTER_DATA (path to master_data.RData with object master_data).
+
+#### Load packages
+
+suppressPackageStartupMessages({
+  library(data.table)
+  library(dplyr)
+  library(readr)
+  library(tibble)
+})
+
+#### Paths and outputs (CCMMF_MANAGEMENT and TRY_MASTER_DATA override defaults)
+
+path_management <- Sys.getenv("CCMMF_MANAGEMENT", "/projectnb/dietzelab/ccmmf/management")
+master_data_path <- Sys.getenv("TRY_MASTER_DATA", "/projectnb/dietzelab/mkim/TRYDataR/master_data.RData")
+landiq_lookup_csv <- file.path(path_management, "LandIQ_cropCode_lookup_table.csv")
+plant_traits_dir <- file.path(path_management, "plant_traits")
+out_rds <- file.path(plant_traits_dir, "planting_lookup_long.rds")
+out_csv <- file.path(plant_traits_dir, "planting_lookup_long.csv")
+
+#### SLA pool: TRY IDs 3115, 3116, 3117 combined into one pseudo-trait
+
+sla_component_ids <- c(3115, 3116, 3117)
+sla_pooled_key <- "SLA_POOLED"
+sla_pooled_name <- "SLA (combined: 3115/3116/3117)"
+
+#### Helper functions
+
+# Agricultural LandIQ rows only; need PFT (same definition as build_harvest_lookup.R).
+load_landiq_mapping <- function(path = landiq_lookup_csv) {
+  d <- as.data.frame(fread(path))
+  d %>%
+    mutate(SUBCLASS = as.character(SUBCLASS)) %>%
+    filter(is_agricultural == TRUE, !is.na(PFT)) %>%
+    transmute(class = CLASS, subclass = SUBCLASS, crop_desc = SUBCLASS_desc, class_desc = CLASS_desc, PFT = PFT)
+}
+
+
+# TRY reports mixed units; this maps OrigUnitStr to the unit system we aggregate in.
+normalize_units <- function(trait_id, value, unit_str) {
+  if (is.na(value)) return(NA_real_)
+  trait_id <- as.numeric(trait_id)
+  value_num <- suppressWarnings(as.numeric(value))
+
+  if (trait_id == 14) {
+    if (is.na(unit_str) || unit_str == "") return(value_num)
+    if (grepl("mg/g|mg g-1|mg N g-1|g/100g", unit_str, ignore.case = TRUE)) return(value_num)
+    if (grepl("%", unit_str, ignore.case = TRUE)) return(value_num * 10)
+    if (grepl("g/kg", unit_str, ignore.case = TRUE)) return(value_num)
+    return(value_num)
+  }
+
+  if (trait_id %in% c(3115, 3116, 3117)) {
+    if (is.na(unit_str) || unit_str == "") return(NA_real_)
+    if (grepl("mm2/mg|mm2 mg-1|mm\\^2 mg-1", unit_str, ignore.case = TRUE)) return(value_num)
+    if (grepl("cm2/g|cm2 g-1|cm\\^2 g-1", unit_str, ignore.case = TRUE)) return(value_num * 0.1)
+    if (grepl("m2/kg|m\\^2 kg-1", unit_str, ignore.case = TRUE)) return(value_num)
+    if (grepl("g/m2|g m-2|mg/dm2", unit_str, ignore.case = TRUE)) return(NA_real_)
+    return(NA_real_)
+  }
+
+  if (trait_id %in% c(3441, 128, 3450, 3952, 3953)) {
+    if (is.na(unit_str) || unit_str == "") return(value_num)
+    if (grepl("kg", unit_str, ignore.case = TRUE)) return(value_num * 1000)
+    if (grepl("mg", unit_str, ignore.case = TRUE)) return(value_num / 1000)
+    return(value_num)
+  }
+
+  if (trait_id %in% c(1534, 2005)) {
+    if (is.na(unit_str) || unit_str == "") return(value_num)
+    if (grepl("%", unit_str, ignore.case = TRUE)) return(value_num / 100)
+    if (grepl("g/g|g g-1", unit_str, ignore.case = TRUE)) return(value_num)
+    return(value_num)
+  }
+
+  if (trait_id %in% c(146, 165, 2057, 1055)) return(value_num)
+  if (is.na(unit_str) || unit_str == "") return(value_num)
+  return(value_num)
+}
+
+
+# First word of AccSpeciesName is genus; value must match LandIQ SUBCLASS_desc text.
+genus_to_group <- c(
+  # Berries & small fruits
+  "Ribes" = "Bush berries",
+  "Rubus" = "Bush berries",
+  "Vaccinium" = "Bush berries",
+  "Fragaria" = "Strawberries",
+  # Tree fruits
+  "Malus" = "Apples",
+  "Pyrus" = "Pears",
+  "Vitis" = "Wine grapes",
+  "Persea" = "Avocados",
+  "Olea" = "Olives",
+  "Juglans" = "Walnuts",
+  "Pistacia" = "Pistachios",
+  "Punica" = "Pomegranates",
+  "Phoenix" = "Dates",
+  # Grains & row crops
+  "Zea" = "Corn, Sorghum or Sudan (grouped for RS only)",
+  "Sorghum" = "Corn, Sorghum or Sudan (grouped for RS only)",
+  "Triticum" = "Wheat",
+  "Oryza" = "Rice",
+  "Zizania" = "Wild rice",
+  "Gossypium" = "Cotton",
+  "Helianthus" = "Sunflowers",
+  # Legumes & forages
+  "Medicago" = "Alfalfa and alfalfa mixtures",
+  "Vigna" = "Beans (dry)",
+  "Glycine" = "Beans (dry)",
+  "Phaseolus" = "Beans (dry)",
+  # Vegetables
+  "Spinacia" = "Spinach",
+  "Lactuca" = "Lettuce (all types)",
+  "Brassica" = "Cole crops (mixture of 22-25)",
+  "Daucus" = "Carrots",
+  "Cucurbita" = "Melons, squash, cucumbers",
+  "Cucumis" = "Melons, squash, cucumbers",
+  "Capsicum" = "Peppers",
+  "Solanum" = "Potatoes",
+  "Ipomoea" = "Sweet potatoes",
+  "Allium" = "Onions & garlic",
+  # Miscellaneous
+  "Prunus" = "Miscellaneous deciduous",
+  "Agrostis" = "Miscellaneous grasses",
+  "Festuca" = "Miscellaneous grasses",
+  "Lolium" = "Miscellaneous grasses",
+  "Poa" = "Miscellaneous grasses",
+  "Achillea" = "Mixed pasture",
+  "Artemisia" = "Mixed pasture",
+  "Carex" = "Mixed pasture",
+  "Juncus" = "Mixed pasture",
+  "Coffea" = "Miscellaneous field",
+  "unknown" = "Miscellaneous field"
+)
+
+map_species_to_group <- function(species_vec) {
+  genus <- sub("^(\\w+).*", "\\1", species_vec)
+  group <- genus_to_group[genus]
+  group[is.na(group)] <- "NA"
+  unname(group)
+}
+
+
+# Per-level means plus species- and dataset-level diagnostics for the long lookup table.
+summarize_level <- function(df, level, level_id_col) {
+  id_sym <- rlang::sym(level_id_col)
+
+  obs <- df %>%
+    group_by(!!id_sym, TraitKey, TraitID, TraitName) %>%
+    summarise(
+      mean_obs = mean(value_std, na.rm = TRUE),
+      sd_obs   = sd(value_std, na.rm = TRUE),
+      n_obs    = sum(!is.na(value_std)),
+      n_species  = n_distinct(AccSpeciesName),
+      n_datasets = n_distinct(DatasetID),
+      .groups = "drop"
+    ) %>%
+    rename(level_id = !!id_sym)
+
+  sp <- df %>%
+    group_by(!!id_sym, TraitKey, TraitID, TraitName, AccSpeciesName) %>%
+    summarise(
+      mean_sp = mean(value_std, na.rm = TRUE),
+      n_obs_sp = sum(!is.na(value_std)),
+      .groups = "drop"
+    )
+  sp_sum <- sp %>%
+    group_by(!!id_sym, TraitKey, TraitID, TraitName) %>%
+    summarise(
+      mean_species = mean(mean_sp, na.rm = TRUE),
+      sd_species_mean = sd(mean_sp, na.rm = TRUE),
+      min_species_n_obs = suppressWarnings(min(n_obs_sp, na.rm = TRUE)),
+      median_species_n_obs = suppressWarnings(stats::median(n_obs_sp, na.rm = TRUE)),
+      max_species_n_obs = suppressWarnings(max(n_obs_sp, na.rm = TRUE)),
+      .groups = "drop"
+    ) %>%
+    rename(level_id = !!id_sym)
+
+  ds <- df %>%
+    group_by(!!id_sym, TraitKey, TraitID, TraitName, DatasetID) %>%
+    summarise(
+      mean_ds = mean(value_std, na.rm = TRUE),
+      n_obs_ds = sum(!is.na(value_std)),
+      .groups = "drop"
+    )
+  ds_sum <- ds %>%
+    group_by(!!id_sym, TraitKey, TraitID, TraitName) %>%
+    summarise(
+      mean_dataset = mean(mean_ds, na.rm = TRUE),
+      sd_dataset_mean = sd(mean_ds, na.rm = TRUE),
+      min_dataset_n_obs = suppressWarnings(min(n_obs_ds, na.rm = TRUE)),
+      median_dataset_n_obs = suppressWarnings(stats::median(n_obs_ds, na.rm = TRUE)),
+      max_dataset_n_obs = suppressWarnings(max(n_obs_ds, na.rm = TRUE)),
+      .groups = "drop"
+    ) %>%
+    rename(level_id = !!id_sym)
+
+  obs %>%
+    left_join(sp_sum, by = c("level_id", "TraitKey", "TraitID", "TraitName")) %>%
+    left_join(ds_sum, by = c("level_id", "TraitKey", "TraitID", "TraitName")) %>%
+    mutate(level = level) %>%
+    relocate(level, level_id, TraitKey, TraitID, TraitName)
+}
+
+
+#### Create output directory
+
+dir.create(plant_traits_dir, recursive = TRUE, showWarnings = FALSE)
+
+#### Load LandIQ and TRY master_data
+
+cat("Loading LandIQ mapping (is_agricultural only)...\n")
+mapping <- load_landiq_mapping(landiq_lookup_csv)
+
+cat("Loading master_data...\n")
+obj <- load(master_data_path)
+if (!("master_data" %in% obj)) {
+  stop("Expected object 'master_data' in ", master_data_path)
+}
+
+#### Normalize TRY units and map species string to LandIQ crop_desc group
+
+cat("Normalizing values + mapping species->group...\n")
+data_normalized <- master_data %>%
+  mutate(
+    value_num = suppressWarnings(as.numeric(OrigValueStr)),
+    value_std = mapply(normalize_units, TraitID, value_num, OrigUnitStr),
+    TraitKey = as.character(TraitID),
+    group = map_species_to_group(AccSpeciesName)
+  ) %>%
+  filter(!is.na(value_std))
+cat("Rows after normalization (all traits): ", nrow(data_normalized), "\n", sep = "")
+
+
+#### Restrict TRY rows to species that map into our LandIQ crop_desc list
+
+group_to_codes <- mapping %>%
+  select(crop_desc, class, subclass, PFT) %>%
+  distinct()
+
+sub_df <- data_normalized %>%
+  filter(group != "NA") %>%
+  inner_join(group_to_codes, by = c("group" = "crop_desc"))
+
+# Same joined TRY rows; we summarize them four ways (different grouping keys below).
+class_df <- sub_df
+pft_df <- sub_df
+glob_df <- sub_df %>%
+  mutate(GLOBAL = "GLOBAL")
+
+# Copy SLA component traits under a single TraitKey so SLA pools with one fallback chain.
+sub_df_sla <- sub_df %>%
+  filter(TraitID %in% sla_component_ids) %>%
+  mutate(TraitKey = sla_pooled_key, TraitID = NA_real_, TraitName = sla_pooled_name)
+class_df_sla <- class_df %>%
+  filter(TraitID %in% sla_component_ids) %>%
+  mutate(TraitKey = sla_pooled_key, TraitID = NA_real_, TraitName = sla_pooled_name)
+pft_df_sla <- pft_df %>%
+  filter(TraitID %in% sla_component_ids) %>%
+  mutate(TraitKey = sla_pooled_key, TraitID = NA_real_, TraitName = sla_pooled_name)
+glob_df_sla <- glob_df %>%
+  filter(TraitID %in% sla_component_ids) %>%
+  mutate(TraitKey = sla_pooled_key, TraitID = NA_real_, TraitName = sla_pooled_name)
+
+#### Summarize traits at each fallback level (subclass, class x PFT, PFT, global; plus SLA pooled)
+
+# Subclass key is first letter of class plus numeric subclass (matches LandIQ code layout).
+sub_df <- sub_df %>% mutate(subclass_id = paste0(class, subclass))
+sub_df_sla <- sub_df_sla %>% mutate(subclass_id = paste0(class, subclass))
+cat("Summarizing subclass level...\n")
+tbl_sub <- bind_rows(
+  summarize_level(sub_df, level = "subclass", level_id_col = "subclass_id"),
+  summarize_level(sub_df_sla, level = "subclass", level_id_col = "subclass_id")
+)
+# Class-level table is keyed by class and PFT together (one trait row per class|PFT).
+class_df <- class_df %>% mutate(class_pft = paste0(class, "|", PFT))
+class_df_sla <- class_df_sla %>% mutate(class_pft = paste0(class, "|", PFT))
+cat("Summarizing class level (by class x PFT)...\n")
+tbl_class <- bind_rows(
+  summarize_level(class_df, level = "class", level_id_col = "class_pft"),
+  summarize_level(class_df_sla, level = "class", level_id_col = "class_pft")
+)
+cat("Summarizing PFT level...\n")
+tbl_pft <- bind_rows(
+  summarize_level(pft_df, level = "pft", level_id_col = "PFT"),
+  summarize_level(pft_df_sla, level = "pft", level_id_col = "PFT")
+)
+cat("Summarizing global level...\n")
+tbl_global <- bind_rows(
+  summarize_level(glob_df, level = "global", level_id_col = "GLOBAL"),
+  summarize_level(glob_df_sla, level = "global", level_id_col = "GLOBAL")
+)
+
+#### Parse level_id and add LandIQ identity columns for the long output
+
+# summarize_level() only keeps level_id plus trait stats. Below we split that id and
+# left_join LandIQ so each row carries class, subclass, crop_desc, PFT (the same
+# names pool_calculations uses). PFT-only and global levels have no LandIQ code;
+# we set those columns to NA so bind_rows() stacks one consistent schema.
+
+tbl_sub_parsed <- tbl_sub %>%
+  mutate(class = sub("^(.)(.*)$", "\\1", level_id),
+         subclass = sub("^.(.*)$", "\\1", level_id)) %>%
+  left_join(mapping %>% distinct(class, subclass, crop_desc, PFT), by = c("class", "subclass")) %>%
+  select(-level_id)
+tbl_class_parsed <- tbl_class %>%
+  mutate(class = sub("^([^|]+)\\|(.*)$", "\\1", level_id),
+         PFT = sub("^([^|]+)\\|(.*)$", "\\2", level_id)) %>%
+  left_join(mapping %>% distinct(class, class_desc), by = "class") %>%
+  mutate(subclass = NA_character_, crop_desc = class_desc) %>%
+  select(-class_desc, -level_id)
+tbl_pft_parsed <- tbl_pft %>%
+  mutate(PFT = level_id, class = NA_character_, subclass = NA_character_, crop_desc = NA_character_) %>%
+  select(-level_id)
+tbl_global_parsed <- tbl_global %>%
+  mutate(class = NA_character_, subclass = NA_character_, crop_desc = NA_character_, PFT = NA_character_) %>%
+  select(-level_id)
+
+planting_lookup_long <- bind_rows(tbl_sub_parsed, tbl_class_parsed, tbl_pft_parsed, tbl_global_parsed) %>%
+  relocate(level, PFT, class, subclass, crop_desc) %>%
+  arrange(level, PFT, class, subclass, TraitID)
+
+
+#### Write RDS and CSV
+
+saveRDS(planting_lookup_long, out_rds)
+write_csv(planting_lookup_long, out_csv)
\ No newline at end of file
diff --git a/modules/data.remote/inst/ccmmf/traits/lai_from_mslsp.R b/modules/data.remote/inst/ccmmf/traits/lai_from_mslsp.R
new file mode 100644
index 00000000000..29021c35acd
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/traits/lai_from_mslsp.R
@@ -0,0 +1,112 @@
+# Leaf area index (LAI) from MSLSP EVImax and EVIamp for CCMMF pool initialization.
+#
+# Mourad et al. (2020): LAI = (max(0, a*sqrt(k*EVI) - b))^2 with fixed a, b. The
+# paper uses only the positive part of (a*sqrt(k*EVI) - b) before squaring; EVI is
+# floored at 0. k reflects phenological timing (0.15 ~ early-season annuals,
+# 0.50 ~ leaf-on on perennials). Row and rice use EVIamp (strong bare season;
+# amplitude tracks the crop cycle). Hay and mature woody use EVImax (often green
+# year-round; amplitude can be small while peak EVI still reflects canopy). Woody
+# with LandIQ CLASS YP (young perennial) uses EVIamp.
+#
+# Why only CLASS, not SUBCLASS: the only LandIQ branch in this file is woody YP
+# vs other woody. Row, rice, and hay do not use CLASS. We do not vary LAI by
+# subclass (e.g. T19 vs T4) yet; that would belong in a richer rule table later.
+# Pool functions pass CLASS from your data, or set it from CLASS_SUBCLASS using
+# lk$mapping when you only supply a code string.
+#
+# Main entry: compute_lai_from_mslsp(mslsp_EVImax, mslsp_EVIamp, pft, class,
+#   diagnostics=FALSE). Scalar LAI, or diagnostics=TRUE returns a list (LAI, lai_*).
+#
+# Downstream: sourced by pool_calculations_from_lookup.R for initialize_planting() (MSLSP path).
+# Run alone: source this file from R (requires dplyr); no env vars for this module.
+# Style: docs/SCRIPT_REFACTOR_PROMPT.md in ccmmf-phenology (ASCII, simple sections).
+
+#### Load packages
+
+suppressPackageStartupMessages({
+  library(dplyr)
+})
+
+lai_mourad_a <- 2.92
+lai_mourad_b <- 0.43
+lai_default_min <- 0
+lai_default_max <- Inf
+
+compute_lai_from_mslsp <- function(mslsp_EVImax, mslsp_EVIamp,
+                                   pft, class = NA_character_,
+                                   diagnostics = FALSE) {
+  if (!diagnostics) {
+    na_out <- NA_real_
+  } else {
+    na_out <- list(
+      LAI = NA_real_, lai_rule_id = NA_character_, lai_evi_field_used = NA_character_,
+      lai_evi_value_used = NA_real_, lai_k = NA_real_, lai_a = NA_real_, lai_b = NA_real_,
+      lai_min = NA_real_, lai_max = NA_real_
+    )
+  }
+
+  if (length(pft) != 1L || is.na(pft)) return(na_out)
+  pft_lc <- tolower(trimws(as.character(pft)))
+  if (!nzchar(pft_lc)) return(na_out)
+
+  if (length(class) != 1L || is.na(class)) {
+    class_uc <- NA_character_
+  } else {
+    class_uc <- toupper(trimws(as.character(class)))
+    if (!nzchar(class_uc)) class_uc <- NA_character_
+  }
+
+  rules <- tibble(pft_lc = pft_lc, class_uc = class_uc) %>%
+    mutate(
+      rule_id = case_when(
+        pft_lc == "row" ~ "row_amp",
+        pft_lc == "rice" ~ "rice_amp",
+        pft_lc == "hay" ~ "hay_max",
+        pft_lc == "woody" & !is.na(class_uc) & class_uc == "YP" ~ "woody_yp_amp",
+        pft_lc == "woody" ~ "woody_max",
+        TRUE ~ NA_character_
+      ),
+      k = case_when(
+        pft_lc %in% c("row", "rice") ~ 0.15,
+        pft_lc %in% c("hay", "woody") ~ 0.50,
+        TRUE ~ NA_real_
+      ),
+      evi_field = case_when(
+        pft_lc %in% c("row", "rice") ~ "EVIamp",
+        pft_lc == "hay" ~ "EVImax",
+        pft_lc == "woody" & !is.na(class_uc) & class_uc == "YP" ~ "EVIamp",
+        pft_lc == "woody" ~ "EVImax",
+        TRUE ~ NA_character_
+      )
+    )
+
+  if (is.na(rules$rule_id[1])) return(na_out)
+
+  evi_used <- if (rules$evi_field[1] == "EVIamp") mslsp_EVIamp else mslsp_EVImax
+  evi_num <- as.numeric(evi_used)
+  k <- rules$k[1]
+  a <- lai_mourad_a
+  b <- lai_mourad_b
+  lai <- NA_real_
+  if (!is.na(evi_num) && !is.na(k)) {
+    evi_num <- pmax(0, evi_num)
+    term <- a * sqrt(k * evi_num) - b
+    lai <- pmax(0, term)^2
+    lai <- pmax(lai_default_min, pmin(lai_default_max, lai))
+  }
+
+  if (!diagnostics) {
+    return(lai)
+  }
+  list(
+    LAI = lai,
+    lai_rule_id = rules$rule_id[1],
+    lai_evi_field_used = rules$evi_field[1],
+    lai_evi_value_used = as.numeric(evi_used),
+    lai_k = k,
+    lai_a = a,
+    lai_b = b,
+    lai_min = lai_default_min,
+    lai_max = lai_default_max
+  )
+}
diff --git a/modules/data.remote/inst/ccmmf/traits/pool_calculations_from_lookup.R b/modules/data.remote/inst/ccmmf/traits/pool_calculations_from_lookup.R
new file mode 100644
index 00000000000..403abf060f2
--- /dev/null
+++ b/modules/data.remote/inst/ccmmf/traits/pool_calculations_from_lookup.R
@@ -0,0 +1,503 @@
+# Pool calculations from pre-built TRY and harvest lookup tables for SIPNET
+# planting and harvest rows. Maps LandIQ crop code and PFT to C and N pools at
+# planting (LAI from MSLSP EVI via lai_from_mslsp.R) and to removal
+# fractions at harvest. Trait lookup order: subclass > class > PFT > global.
+#
+# Main inputs: planting_lookup_long.rds, harvest_lookup_long.rds,
+# LandIQ_cropCode_lookup_table.csv
+#
+# Main outputs: Tibbles from initialize_planting() and initialize_harvest_from_lookup() (no writes here).
+# Used by make_events_statewide.R for statewide planting events.
+
+#### Load packages
+
+suppressPackageStartupMessages({
+  library(data.table)
+  library(dplyr)
+  library(readr)
+  library(tibble)
+})
+
+#### Paths and configuration
+
+path_management     <- Sys.getenv("CCMMF_MANAGEMENT", "/projectnb/dietzelab/ccmmf/management")
+plant_traits_dir    <- file.path(path_management, "plant_traits")
+planting_lookup_rds <- file.path(plant_traits_dir, "planting_lookup_long.rds")
+harvest_lookup_rds  <- file.path(plant_traits_dir, "harvest_lookup_long.rds")
+landiq_lookup_csv   <- file.path(path_management, "LandIQ_cropCode_lookup_table.csv")
+
+source(file.path(path_management, "scripts/traits/lai_from_mslsp.R"))
+
+sla_pooled_key <- "SLA_POOLED"
+
+# Infix "default if missing": x %||% y returns y when x is NULL or length-0, else x
+# (same idea as ?? in some languages). We use it because trait lookup records do not
+# always include every optional field (e.g. n_obs, sd_obs); for diagnostics we still
+# want stable columns, so we fall back to NA_* instead of NULL or branching everywhere.
+`%||%` <- function(x, y) if (is.null(x) || length(x) == 0) y else x
+
+#### LandIQ crop table (agricultural rows, key = CLASS pasted with SUBCLASS)
+
+load_landiq_mapping <- function(path = landiq_lookup_csv) {
+  d <- as.data.frame(fread(path))
+  d %>%
+    filter(is_agricultural == TRUE) %>%
+    mutate(CLASS = as.character(CLASS),
+           SUBCLASS = as.character(SUBCLASS),
+           key = paste0(CLASS, SUBCLASS)) %>%
+    relocate(CLASS, SUBCLASS, CLASS_desc, SUBCLASS_desc, PFT, key)
+}
+
+#### Named-vector indexes from RDS long tables (fast lookup by string key)
+#
+# load_trait_lookup() splits the long table by level (subclass, class, pft, global).
+# Each call below gets one of those pieces. Keys are built in the same order
+# get_trait_record tries them: subclass, class, pft, global.
+
+make_trait_index <- function(lookup_df) {
+  lookup_df <- as.data.frame(lookup_df)
+  cols <- c(
+    "mean_obs", "sd_obs", "n_obs", "n_species", "n_datasets",
+    "mean_species", "sd_species_mean",
+    "mean_dataset", "sd_dataset_mean",
+    "replicates_median", "n_replicates_nonNA", "errorRisk_max",
+    "TraitName"
+  )
+  # Drop any name not in the table (the planting RDS does not have every column listed above).
+  cols <- intersect(cols, names(lookup_df))
+  # No data rows: return empty indexes. Otherwise the tests below can guess wrong on empty vectors.
+  if (nrow(lookup_df) == 0L) {
+    idx <- lapply(cols, function(cc) stats::setNames(lookup_df[[cc]], character(0)))
+    names(idx) <- cols
+    return(idx)
+  }
+  if ("subclass" %in% names(lookup_df) && !all(is.na(lookup_df$subclass))) {
+    # Subclass: class+subclass|TraitKey (harvest includes PFT in this key; traits do not)
+    key <- paste0(lookup_df$class, lookup_df$subclass, "|", lookup_df$TraitKey)
+  } else if ("class" %in% names(lookup_df) && !all(is.na(lookup_df$class))) {
+    # Class-level rows: class|PFT|TraitKey or class|TraitKey
+    has_pft <- "PFT" %in% names(lookup_df)
+    use_pft <- if (has_pft) !is.na(lookup_df$PFT) & nzchar(trimws(as.character(lookup_df$PFT))) else rep(FALSE, nrow(lookup_df))
+    lookup_key <- ifelse(use_pft, paste0(lookup_df$class, "|", lookup_df$PFT), dplyr::coalesce(lookup_df$class, "GLOBAL"))
+    key <- paste0(lookup_key, "|", lookup_df$TraitKey)
+  } else if ("PFT" %in% names(lookup_df) && "class" %in% names(lookup_df) && all(is.na(lookup_df$class)) && !all(is.na(lookup_df$PFT))) {
+    # PFT-level rows: PFT|TraitKey
+    key <- paste0(lookup_df$PFT, "|", lookup_df$TraitKey)
+  } else if ("PFT" %in% names(lookup_df) && all(is.na(lookup_df$PFT))) {
+    # Global rows: GLOBAL|TraitKey
+    key <- paste0("GLOBAL|", lookup_df$TraitKey)
+  } else {
+    stop("make_trait_index: unrecognized row layout (check level column in RDS)", call. = FALSE)
+  }
+  idx <- lapply(cols, function(cc) stats::setNames(lookup_df[[cc]], key))
+  names(idx) <- cols
+  idx
+}
+
+make_harvest_index <- function(lookup_df) {
+  lookup_df <- as.data.frame(lookup_df)
+  cols <- c("mean_obs", "sd_obs", "n_obs")
+  cols <- intersect(cols, names(lookup_df))
+  # No data rows: return empty indexes (same reason as make_trait_index).
+  if (nrow(lookup_df) == 0L) {
+    idx <- lapply(cols, function(cc) stats::setNames(lookup_df[[cc]], character(0)))
+    names(idx) <- cols
+    return(idx)
+  }
+  if ("subclass" %in% names(lookup_df) && !all(is.na(lookup_df$subclass))) {
+    key <- paste0(lookup_df$class, lookup_df$subclass, "|", lookup_df$PFT, "|", lookup_df$param)
+  } else if ("class" %in% names(lookup_df) && !all(is.na(lookup_df$class))) {
+    key <- paste0(lookup_df$class, "|", lookup_df$PFT, "|", lookup_df$param)
+  } else if ("PFT" %in% names(lookup_df) && "class" %in% names(lookup_df) && all(is.na(lookup_df$class)) && !all(is.na(lookup_df$PFT))) {
+    key <- paste0(lookup_df$PFT, "|", lookup_df$param)
+  } else if ("PFT" %in% names(lookup_df) && all(is.na(lookup_df$PFT))) {
+    key <- paste0("GLOBAL|", lookup_df$param)
+  } else {
+    stop("make_harvest_index: unrecognized row layout (check level column in RDS)", call. = FALSE)
+  }
+  idx <- lapply(cols, function(cc) stats::setNames(lookup_df[[cc]], key))
+  names(idx) <- cols
+  idx
+}
+
+# One row from an index built above; NULL if key missing or mean_obs is NA.
+lookup_index_fetch <- function(idx, key) {
+  if (is.null(key)) return(NULL)
+  v <- idx$mean_obs[key]
+  if (length(v) == 0 || is.na(v[[1]])) return(NULL)
+  out <- list()
+  for (nm in names(idx)) out[[nm]] <- idx[[nm]][key][[1]]
+  out
+}
+
+#### Load planting and harvest RDS; build all indexes and LandIQ mapping
+
+load_trait_lookup <- function(path = planting_lookup_rds,
+                              harvest_path = harvest_lookup_rds,
+                              landiq_path = landiq_lookup_csv) {
+  df <- readRDS(path)
+  harvest_df <- readRDS(harvest_path)
+  list(
+    lookup = df,
+    idx_subclass = make_trait_index(df %>% filter(.data$level == "subclass")),
+    idx_class    = make_trait_index(df %>% filter(.data$level == "class")),
+    idx_pft      = make_trait_index(df %>% filter(.data$level == "pft")),
+    idx_global   = make_trait_index(df %>% filter(.data$level == "global")),
+    harvest_lookup = harvest_df,
+    idx_harvest_subclass = make_harvest_index(harvest_df %>% filter(.data$level == "subclass")),
+    idx_harvest_class    = make_harvest_index(harvest_df %>% filter(.data$level == "class")),
+    idx_harvest_pft      = make_harvest_index(harvest_df %>% filter(.data$level == "pft")),
+    idx_harvest_global   = make_harvest_index(harvest_df %>% filter(.data$level == "global")),
+    mapping = load_landiq_mapping(landiq_path)
+  )
+}
+
+#### LandIQ code or CLASS+SUBCLASS to crop fields used in keys
+
+# Returns class, subclass, crop_desc, class_desc, pft from LandIQ code (e.g. T19).
+get_group_class_from_code <- function(code, mapping_df) {
+  row <- mapping_df %>% filter(.data$key == !!code)
+  if (nrow(row) == 0) return(list(class = NA_character_, subclass = NA_character_, crop_desc = NA_character_, class_desc = NA_character_, pft = NA_character_))
+  pft <- if ("PFT" %in% names(row)) row$PFT[1] else NA_character_
+  list(class = row$CLASS[1], subclass = row$SUBCLASS[1], crop_desc = row$SUBCLASS_desc[1], class_desc = row$CLASS_desc[1], pft = pft)
+}
+
+# Same structure as get_group_class_from_code but look up by class and subclass.
+get_group_class_from_class_subclass <- function(class, subclass, mapping_df) {
+  if (is.null(mapping_df)) return(list(class = NA_character_, subclass = NA_character_, crop_desc = NA_character_, class_desc = NA_character_, pft = NA_character_))
+  class <- trimws(as.character(class))[1]
+  subclass <- as.character(subclass)[1]
+  if (is.na(class) || is.na(subclass) || !nzchar(class) || !nzchar(subclass)) return(list(class = NA_character_, subclass = NA_character_, crop_desc = NA_character_, class_desc = NA_character_, pft = NA_character_))
+  row <- mapping_df %>% filter(.data$CLASS == !!class, .data$SUBCLASS == !!subclass)
+  if (nrow(row) == 0) return(list(class = class, subclass = subclass, crop_desc = NA_character_, class_desc = NA_character_, pft = NA_character_))
+  pft <- if ("PFT" %in% names(row)) row$PFT[1] else NA_character_
+  list(class = row$CLASS[1], subclass = row$SUBCLASS[1], crop_desc = row$SUBCLASS_desc[1], class_desc = row$CLASS_desc[1], pft = pft)
+}
+
+as_trait_key <- function(trait_id_or_key) {
+  if (is.character(trait_id_or_key)) return(trait_id_or_key)
+  as.character(as.numeric(trait_id_or_key))
+}
+
+#### Trait and harvest values with subclass then class then pft then global fallback
+
+get_trait_record <- function(lk, subclass, class, trait_id_or_key, pft = NULL) {
+  tkey <- as_trait_key(trait_id_or_key)
+  k_sub <- paste0(class, subclass, "|", tkey)
+  if (is.null(pft) && !is.null(lk$mapping) && "PFT" %in% names(lk$mapping)) {
+    row <- lk$mapping %>% filter(.data$CLASS == !!class, .data$SUBCLASS == !!subclass)
+    pft <- if (nrow(row) > 0) row$PFT[1] else NA_character_
+  }
+  k_cls <- if (!is.na(pft) && nzchar(pft)) paste0(class, "|", pft, "|", tkey) else paste0(class, "|", tkey)
+  k_pft <- if (!is.na(pft) && nzchar(pft)) paste0(pft, "|", tkey) else NULL
+  k_glb <- paste0("GLOBAL|", tkey)
+
+  r <- lookup_index_fetch(lk$idx_subclass, k_sub)
+  if (!is.null(r)) return(c(r, list(value = r$mean_obs, src = "subclass")))
+  r <- lookup_index_fetch(lk$idx_class, k_cls)
+  if (!is.null(r)) return(c(r, list(value = r$mean_obs, src = "class")))
+  if (!is.null(k_pft) && !is.null(lk$idx_pft)) {
+    r <- lookup_index_fetch(lk$idx_pft, k_pft)
+    if (!is.null(r)) return(c(r, list(value = r$mean_obs, src = "pft")))
+  }
+  r <- lookup_index_fetch(lk$idx_global, k_glb)
+  if (!is.null(r)) return(c(r, list(value = r$mean_obs, src = "global")))
+  list(value = NA_real_, src = "none")
+}
+
+# Harvest fallback: subclass -> class -> pft -> global (same as trait lookup).
+# lookup_pft: row/rice/hay/woody/woody_destructive (maps from parcel PFT + destructive).
+get_harvest_param <- function(lk, subclass, class, lookup_pft, param) {
+  k_sub <- paste0(class, subclass, "|", lookup_pft, "|", param)
+  k_cls <- paste0(class, "|", lookup_pft, "|", param)
+  k_pft <- paste0(lookup_pft, "|", param)
+  k_glb <- paste0("GLOBAL|", param)
+
+  r <- lookup_index_fetch(lk$idx_harvest_subclass, k_sub)
+  if (!is.null(r)) return(c(r, list(value = r$mean_obs, src = "subclass")))
+  r <- lookup_index_fetch(lk$idx_harvest_class, k_cls)
+  if (!is.null(r)) return(c(r, list(value = r$mean_obs, src = "class")))
+  r <- lookup_index_fetch(lk$idx_harvest_pft, k_pft)
+  if (!is.null(r)) return(c(r, list(value = r$mean_obs, src = "pft")))
+  r <- lookup_index_fetch(lk$idx_harvest_global, k_glb)
+  if (!is.null(r)) return(c(r, list(value = r$mean_obs, src = "global")))
+  list(value = NA_real_, src = "none")
+}
+
+derive_CN_coarse <- function(lk, subclass, class) {
+  CN_root  <- get_trait_record(lk, subclass, class, 1055)$value
+  CN_fine  <- get_trait_record(lk, subclass, class, 2057)$value
+  f_fine   <- get_trait_record(lk, subclass, class, 2005)$value
+  f_coarse <- get_trait_record(lk, subclass, class, 1534)$value
+
+  if (all(!is.na(c(CN_root, CN_fine, f_fine, f_coarse)))) {
+    f_root <- f_fine + f_coarse
+    if (!is.na(f_root) && f_root > 0) {
+      f_fine_root <- f_fine / f_root
+      f_coarse_root <- f_coarse / f_root
+      denom <- (1 / CN_root) - (f_fine_root / CN_fine)
+      if (!is.na(denom) && denom > 0) {
+        CN_coarse <- f_coarse_root / denom
+        if (is.finite(CN_coarse) && CN_coarse > 0) return(CN_coarse)
+      }
+    }
+  }
+  if (!is.na(CN_root)) return(CN_root)
+  if (!is.na(CN_fine)) return(CN_fine)
+  CN_stem <- get_trait_record(lk, subclass, class, 165)$value
+  if (!is.na(CN_stem)) return(CN_stem)
+  CN_leaf <- get_trait_record(lk, subclass, class, 146)$value
+  if (!is.na(CN_leaf)) return(CN_leaf)
+  NA_real_
+}
+
+#### Planting: core (LandIQ code plus numeric LAI to C and N pools)
+
+planting_pools_from_lookup <- function(ID, DATE, code, LAI, PFT, lk,
+                                       diagnostics = FALSE) {
+  gc <- get_group_class_from_code(code, lk$mapping)
+  subclass <- gc$subclass
+  class <- gc$class
+  crop_desc <- gc$crop_desc
+  class_desc <- gc$class_desc
+  pft <- gc$pft
+
+  sla_rec <- get_trait_record(lk, subclass, class, sla_pooled_key, pft = pft)
+  SLA <- sla_rec$value
+  if (is.na(SLA) || SLA <= 0) {
+    out <- tibble(
+      LOC = ID, DATE = DATE, CLASS_SUBCLASS = code, class = class, subclass = subclass, crop_desc = crop_desc, CLASS_DESC = class_desc,
+      PFT = PFT, LAI = LAI,
+      C_LEAF = NA_real_, C_STEM = NA_real_, C_FINEROOT = NA_real_, C_COARSEROOT = NA_real_,
+      N_LEAF = NA_real_, N_STEM = NA_real_, N_FINEROOT = NA_real_, N_COARSEROOT = NA_real_,
+      ENSEMBLE_SIZE = 1L
+    )
+    if (diagnostics) {
+      out$sla_src <- sla_rec$src
+      out$sla_n_obs <- sla_rec$n_obs %||% NA_integer_
+      out$sla_sd_obs <- sla_rec$sd_obs %||% NA_real_
+    }
+    return(out)
+  }
+
+  M_leaf_kgm2 <- LAI / SLA
+
+  r3441 <- get_trait_record(lk, subclass, class, 3441, pft = pft)
+  r128  <- get_trait_record(lk, subclass, class, 128, pft = pft)
+  r1534 <- get_trait_record(lk, subclass, class, 1534, pft = pft)
+  r2005 <- get_trait_record(lk, subclass, class, 2005, pft = pft)
+
+  M_leaf_gplant <- r3441$value
+  M_stem_gplant <- r128$value
+  f_coarse <- r1534$value
+  f_fine   <- r2005$value
+
+  f_root <- NA_real_
+  if (!is.na(f_coarse) || !is.na(f_fine)) f_root <- dplyr::coalesce(f_coarse, 0) + dplyr::coalesce(f_fine, 0)
+
+  F_stem <- F_fineRoot <- F_coarseRoot <- NA_real_
+  if (!any(is.na(c(M_leaf_gplant, M_stem_gplant, f_root))) && f_root > 0 && f_root < 1) {
+    M_plant_gplant <- (M_leaf_gplant + M_stem_gplant) / (1 - f_root)
+    M_fine_gplant  <- if (!is.na(f_fine))   f_fine   * M_plant_gplant else NA_real_
+    M_coarse_gplant<- if (!is.na(f_coarse)) f_coarse * M_plant_gplant else NA_real_
+    F_stem <- M_stem_gplant / M_leaf_gplant
+    F_fineRoot <- if (!is.na(M_fine_gplant)) M_fine_gplant / M_leaf_gplant else NA_real_
+    F_coarseRoot <- if (!is.na(M_coarse_gplant)) M_coarse_gplant / M_leaf_gplant else NA_real_
+  } else if (!any(is.na(c(M_leaf_gplant, M_stem_gplant)))) {
+    F_stem <- M_stem_gplant / M_leaf_gplant
+  }
+
+  M_stem_kgm2   <- if (!is.na(F_stem)       && !is.na(M_leaf_kgm2)) F_stem       * M_leaf_kgm2 else NA_real_
+  M_fine_kgm2   <- if (!is.na(F_fineRoot)   && !is.na(M_leaf_kgm2)) F_fineRoot   * M_leaf_kgm2 else NA_real_
+  M_coarse_kgm2 <- if (!is.na(F_coarseRoot) && !is.na(M_leaf_kgm2)) F_coarseRoot * M_leaf_kgm2 else NA_real_
+
+  C_leaf <- M_leaf_kgm2 * 0.47
+  C_stem <- M_stem_kgm2 * 0.47
+  C_fineroot <- M_fine_kgm2 * 0.47
+  C_coarseroot <- M_coarse_kgm2 * 0.50
+
+  r14   <- get_trait_record(lk, subclass, class, 14, pft = pft)
+  r146  <- get_trait_record(lk, subclass, class, 146, pft = pft)
+  r165  <- get_trait_record(lk, subclass, class, 165, pft = pft)
+  r1055 <- get_trait_record(lk, subclass, class, 1055, pft = pft)
+  r2057 <- get_trait_record(lk, subclass, class, 2057, pft = pft)
+
+  Nleaf_mass_mgg <- r14$value
+  CN_leaf <- r146$value
+  CN_stem <- r165$value
+  CN_root <- r1055$value
+  CN_fine <- r2057$value
+
+  if (!is.na(Nleaf_mass_mgg) && !is.na(M_leaf_kgm2)) {
+    N_leaf <- M_leaf_kgm2 * (Nleaf_mass_mgg * 1e-6)
+  } else if (!is.na(CN_leaf) && !is.na(C_leaf)) {
+    N_leaf <- C_leaf / CN_leaf
+  } else {
+    N_leaf <- NA_real_
+  }
+
+  if (!is.na(C_stem) && !is.na(CN_stem)) {
+    N_stem <- C_stem / CN_stem
+  } else if (!is.na(C_stem) && !is.na(CN_root)) {
+    N_stem <- C_stem / CN_root
+  } else if (!is.na(C_stem) && !is.na(CN_leaf)) {
+    N_stem <- C_stem / CN_leaf
+  } else {
+    N_stem <- NA_real_
+  }
+
+  CN_fine_use <- CN_fine
+  if (is.na(CN_fine_use)) CN_fine_use <- CN_root
+  if (is.na(CN_fine_use)) CN_fine_use <- CN_stem
+  if (is.na(CN_fine_use)) CN_fine_use <- CN_leaf
+  N_fineroot <- if (!is.na(C_fineroot) && !is.na(CN_fine_use)) C_fineroot / CN_fine_use else NA_real_
+
+  CN_coarse <- derive_CN_coarse(lk, subclass, class)
+  if (is.na(CN_coarse)) {
+    if (!is.na(CN_root)) CN_coarse <- CN_root
+    else if (!is.na(CN_stem)) CN_coarse <- CN_stem
+    else if (!is.na(CN_leaf)) CN_coarse <- CN_leaf
+  }
+  N_coarseroot <- if (!is.na(C_coarseroot) && !is.na(CN_coarse)) C_coarseroot / CN_coarse else NA_real_
+
+  out <- tibble(
+    LOC = ID, DATE = DATE, CLASS_SUBCLASS = code, class = class, subclass = subclass, crop_desc = crop_desc, CLASS_DESC = class_desc,
+    PFT = PFT, LAI = LAI,
+    C_LEAF = C_leaf, C_STEM = C_stem, C_FINEROOT = C_fineroot, C_COARSEROOT = C_coarseroot,
+    N_LEAF = N_leaf, N_STEM = N_stem, N_FINEROOT = N_fineroot, N_COARSEROOT = N_coarseroot,
+    ENSEMBLE_SIZE = 1L
+  )
+
+  if (diagnostics) {
+    out$sla_src <- sla_rec$src
+    out$sla_n_obs <- sla_rec$n_obs %||% NA_integer_
+    out$sla_sd_obs <- sla_rec$sd_obs %||% NA_real_
+    out$src_14   <- r14$src
+    out$src_3441 <- r3441$src
+    out$src_128  <- r128$src
+    out$src_2005 <- r2005$src
+    out$src_1534 <- r1534$src
+    out$src_1055 <- r1055$src
+    out$src_2057 <- r2057$src
+    out$src_146  <- r146$src
+    out$src_165  <- r165$src
+    out$used_class_any  <- any(c(out$src_14, out$src_3441, out$src_128, out$src_2005, out$src_1534,
+                                 out$src_1055, out$src_2057, out$src_146, out$src_165) == "class")
+    out$used_pft_any    <- any(c(out$src_14, out$src_3441, out$src_128, out$src_2005, out$src_1534,
+                                 out$src_1055, out$src_2057, out$src_146, out$src_165) == "pft")
+    out$used_global_any <- any(c(out$src_14, out$src_3441, out$src_128, out$src_2005, out$src_1534,
+                                 out$src_1055, out$src_2057, out$src_146, out$src_165) == "global")
+  }
+  out
+}
+
+#### Planting: initialize_planting (fixed LAI or MSLSP EVI)
+
+# If LAI is a finite number, use it and ignore mslsp_EVImax and mslsp_EVIamp.
+# Else require both EVI fields and compute LAI with compute_lai_from_mslsp().
+# LandIQ key: if class and subclass are both non-empty, code is paste0(class, subclass).
+# Otherwise code must be set. Non-empty class overrides the class used in
+# compute_lai_from_mslsp when the caller passed only a code string (optional CLASS column).
+
+initialize_planting <- function(
+    ID, DATE, PFT, lk,
+    code = NULL,
+    class = NA_character_,
+    subclass = NA_character_,
+    LAI = NA_real_,
+    mslsp_EVImax = NULL,
+    mslsp_EVIamp = NULL,
+    diagnostics = FALSE) {
+
+  cls <- trimws(as.character(class)[1])
+  sub <- as.character(subclass)[1]
+  code_in <- if (is.null(code)) "" else trimws(as.character(code)[1])
+
+  if (!is.na(cls) && nzchar(cls) && !is.na(sub) && nzchar(sub)) {
+    code_chr <- paste0(cls, sub)
+  } else if (nzchar(code_in)) {
+    code_chr <- code_in
+  } else {
+    stop("initialize_planting: provide LandIQ code or both class and subclass.")
+  }
+
+  lai_vec <- suppressWarnings(as.numeric(LAI))
+  use_explicit_lai <- length(lai_vec) > 0L && is.finite(lai_vec[[1]]) && !is.na(lai_vec[[1]])
+
+  if (use_explicit_lai) {
+    return(planting_pools_from_lookup(
+      ID = ID, DATE = DATE, code = code_chr, LAI = lai_vec[[1]], PFT = PFT, lk = lk, diagnostics = diagnostics
+    ))
+  }
+
+  if (is.null(mslsp_EVImax) || is.null(mslsp_EVIamp)) {
+    stop("initialize_planting: provide finite LAI, or both mslsp_EVImax and mslsp_EVIamp.")
+  }
+  mx <- suppressWarnings(as.numeric(mslsp_EVImax)[1])
+  ma <- suppressWarnings(as.numeric(mslsp_EVIamp)[1])
+  if (is.na(mx) || is.na(ma)) {
+    stop("initialize_planting: mslsp_EVImax and mslsp_EVIamp must be non-NA when LAI is not given.")
+  }
+
+  class_for_lai <- cls
+  if (is.na(class_for_lai) || !nzchar(class_for_lai)) {
+    class_for_lai <- get_group_class_from_code(code_chr, lk$mapping)$class
+  }
+
+  lai_diag <- compute_lai_from_mslsp(
+    mslsp_EVImax = mx,
+    mslsp_EVIamp = ma,
+    pft = PFT,
+    class = class_for_lai,
+    diagnostics = TRUE
+  )
+
+  out <- planting_pools_from_lookup(
+    ID = ID, DATE = DATE, code = code_chr, LAI = lai_diag$LAI, PFT = PFT, lk = lk, diagnostics = diagnostics
+  )
+  if (diagnostics) {
+    out$lai_rule_id <- lai_diag$lai_rule_id
+    out$lai_evi_field_used <- lai_diag$lai_evi_field_used
+    out$lai_evi_value_used <- lai_diag$lai_evi_value_used
+    out$lai_k <- lai_diag$lai_k
+    out$lai_a <- lai_diag$lai_a
+    out$lai_b <- lai_diag$lai_b
+    out$lai_min <- lai_diag$lai_min
+    out$lai_max <- lai_diag$lai_max
+  }
+  out
+}
+
+#### Harvest: removal fractions from lookup
+
+initialize_harvest_from_lookup <- function(ID, DATE, code, PFT, lk, destructive = FALSE) {
+  gc <- get_group_class_from_code(code, lk$mapping)
+  subclass <- gc$subclass
+  class <- gc$class
+  crop_desc <- gc$crop_desc
+  class_desc <- gc$class_desc
+
+  # Map parcel PFT to lookup PFT: rice/row->row, hay->hay, woody->woody or woody_destructive
+  lookup_pft <- dplyr::case_when(
+    PFT %in% c("rice", "row")    ~ "row",
+    PFT == "hay"                 ~ "hay",
+    PFT == "woody" & destructive ~ "woody_destructive",
+    PFT == "woody"               ~ "woody",
+    TRUE                         ~ "skip"
+  )
+  if (lookup_pft == "skip") return(NULL)
+
+  r_agb_rem <- get_harvest_param(lk, subclass, class, lookup_pft, "AGB_REMOVED")
+  r_agb_lit <- get_harvest_param(lk, subclass, class, lookup_pft, "AGB_LITTER")
+  r_bgb_rem <- get_harvest_param(lk, subclass, class, lookup_pft, "BGB_REMOVED")
+  r_bgb_lit <- get_harvest_param(lk, subclass, class, lookup_pft, "BGB_LITTER")
+
+  tibble(
+    LOC = ID, DATE = DATE, CLASS_SUBCLASS = code, class = class, subclass = subclass, crop_desc = crop_desc, CLASS_DESC = class_desc,
+    PFT = PFT,
+    AGB_REMOVED = r_agb_rem$value, AGB_LITTER = r_agb_lit$value,
+    BGB_REMOVED = r_bgb_rem$value, BGB_LITTER = r_bgb_lit$value,
+    ENSEMBLE_SIZE = 1L
+  )
+}
+