Voteshield · Tommi-Tsuruga · Jun 15, 2026 · Jun 15, 2026
diff --git a/reggie/configs/data/maine.yaml b/reggie/configs/data/maine.yaml
@@ -253,6 +253,35 @@ ordered_columns:
   - KCB
   - ACF
   - DT CHG
+
+# In June 2026 Maine renamed some columns, this is the old to new dict
+rename_columns:
+  REG_MUNI: REG TOWN
+  VOTER_REC_NUM: VOTER ID
+  LAST_NAME: LAST NAME
+  FIRST_NAME: FIRST NAME
+  MIDDLE_NAME: MIDDLE NAME
+  SUFFIX: SUFF
+  RES_STR_NUM: ST NUM
+  RES_SUF_A: ST NUM A
+  RES_SUF_B: ST NUM B
+  RES_STR_NAME: ST NAME1
+  RES_UNIT_TYPE: ST NAME2
+  RES_UNIT_NUM: UNIT
+  RES_MUNI: CITY # same 
+  RES_ST: STATE
+  RES_ZIP5: ZIP
+  RES_ZIP4: ZIP4
+  MAIL_STR_NUM: MAIL ST NUM
+  MAIL_SUF_A: MAIL ST NUM A
+  MAIL_SUF_B: MAIL ST NUM B
+  MAIL_STR_NAME: MAIL ST NAME1
+  MAIL_UNIT_NUM: MAIL UNIT
+  MAIL_MUNI: MAIL CITY
+  MAIL_ST: MAIL STATE
+  VOTER_STATUS: STATUS
+  W-P: WP
+  REG_DATE: DT ACCEPT # No longer do we have DT_EFFECT, it seems no more status change date either
 
 county_codes:
   Androscoggin: 01and

diff --git a/reggie/ingestion/preprocessor/maine_preprocessor.py b/reggie/ingestion/preprocessor/maine_preprocessor.py
@@ -104,11 +104,14 @@ def keep_most_recent_record(voters_df, cancelled_df):
         cancelled_df = pd.DataFrame()
         hist_df = pd.DataFrame()
         for file in new_files:
-            if "voter.txt" in file["name"].lower():  # needs extension
+            if ".html" in file["name"].lower():
+                continue
+            elif "voter.txt" in file["name"].lower() or ("a&i" and "partycampaignusevoterfile") in file["name"].lower(): 
                 logging.info(f"voter file found: {file['name']}")
                 voter_df = self.read_csv_count_error_lines(
                     file["obj"], sep="|", dtype="str", on_bad_lines="warn"
                 )
+                voter_df.rename(columns=self.config["rename_columns"], inplace=True)
                 voter_df_shape_before = voter_df.shape
                 voter_df.dropna(subset=["VOTER ID"], inplace=True)
                 voter_df_shape_after = voter_df.shape
@@ -121,9 +124,13 @@ def keep_most_recent_record(voters_df, cancelled_df):
                 logging.info(
                     f"Dropped {voter_df_shape_before[0] - voter_df_shape_after[0]} rows due to NaN county values"
                 )
+
+                #As of June 2026, there are no more reason codes in Maine, 
+                # The other missing columns are: 
+                if "REASON" not in voter_df.columns:
+                    voter_df["REASON"] = np.NAN
             elif (
                 "history" in file["name"].lower()
-                and ".html" not in file["name"].lower()
             ):
                 # Maine Voter History seems to come one file per election,
                 # Sometimes they have a history report html file that we skip
@@ -133,7 +140,7 @@ def keep_most_recent_record(voters_df, cancelled_df):
                 )
                 logging.info(f"concatenating {file['name']}")
                 hist_df = pd.concat([hist_df, new_hist])
-            elif "cancelled" in file["name"].lower():
+            elif "cancelled" or "cxl" in file["name"].lower():
                 # Note: the cancelled file does not have a county column
                 logging.info(f"cancelled file found: {file['name']}")