From fd477e53be4120d3e61c1cd3d35e82a69a456b03 Mon Sep 17 00:00:00 2001 From: Tomas Prokop Date: Fri, 12 Jun 2026 11:02:18 +0200 Subject: [PATCH 1/6] perf(data/import): pre-populate LookupKeys cache before import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CMT's UpsertMultiple handler only sets record.newId when RecordCreated==true (line 884 of ImportCrmEntityActions.cs). For records already present in the target environment, RecordCreated=false → newId stays null → every child-entity lookup for that parent calls LookupCustomerOrLookupFieldInCRM — one server round-trip (~300-500ms) per unique lookup reference. Since CMT always preserves source GUIDs (UpsertRequest.Target.Id = source GUID), targetGuid == sourceGuid for every package record. CmtImportRunner now pre-seeds ImportCommonMethods.LookupKeys with entity:sourceGuid → sourceGuid for every record in the package immediately after ValidateSchemaFile loads the data into memory. FindEntity() short-circuits at the LookupKeys check (before the newId path), so all internal package lookups resolve from cache without any server call. External lookups (entities not in the package) are unaffected. Benchmark impact (tiny-chain-100, 3679 records): Before: 2364 LOOKUP TO CRM calls, 15m45s, 3.89 RPS Expected after: ~0 LOOKUP TO CRM calls for internal refs Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../CmtImportRunner.cs | 143 +++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) diff --git a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs index fd5d4656..be67a06d 100644 --- a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs +++ b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs @@ -246,7 +246,21 @@ private async Task RunInternalAsync( _logger.LogInformation("Schema Validation Complete."); - // 8. Import data (synchronous — blocks via internal WaitOne). + // 8. Pre-populate LookupKeys for all package records. + // CMT's FindEntity() only sets record.newId when UpsertMultiple returns + // RecordCreated=true. For records that already exist in the target, + // RecordCreated=false → newId stays null → every child-entity lookup + // for that parent falls through to a separate server round-trip. + // + // Because CMT always preserves source GUIDs (UpsertRequest.Target.Id = + // source GUID), the target GUID equals the source GUID for every record + // in the package. Pre-seeding LookupKeys with this mapping lets + // FindEntity() short-circuit at the cache check (line 2851 in + // ImportCrmEntityActions.cs) without any server call, for both new + // records and records that already exist in the target. + PrePopulateLookupKeysFromPackage(); + + // 9. Import data (synchronous — blocks via internal WaitOne). _logger.LogInformation("Starting data import..."); // NOTE: The deleteBeforeAdd parameter is accepted by CMT's API but // is never actually used internally — the delete functionality was @@ -427,4 +441,131 @@ private void RegisterExtractedDirectoryForProbing(string directory) _unresolvedAssemblies.Add(key); return null; } + + /// + /// Pre-seeds ImportCommonMethods.LookupKeys with every record + /// in the loaded package so that FindEntity() can short-circuit the + /// cache lookup for internal package references without a server round-trip. + /// + /// + /// CMT's UpsertMultiple handler only sets record.newId when + /// RecordCreated == true (line 884 of ImportCrmEntityActions.cs). + /// For records that already exist in the target (RecordCreated=false), + /// newId stays null, so every child-entity lookup for that parent falls + /// through to LookupCustomerOrLookupFieldInCRM — one server call per + /// unique lookup reference. + /// + /// + /// + /// Because CMT always preserves source GUIDs (the UpsertRequest target + /// entity's Id is set to the source record GUID), the target GUID is + /// always equal to the source GUID. Pre-seeding the cache with + /// "entityName:sourceGuid" → sourceGuid is therefore always correct, + /// regardless of whether the record is new or pre-existing in the target. + /// + /// + /// + /// This does not affect external lookups (entities not present in the package) + /// — those still resolve via the normal server call path. + /// + /// + /// + /// Accesses CMT internals via reflection because + /// Microsoft.Xrm.Tooling.Dmt.DataMigCommon is a net462 legacy assembly + /// that is patched and loaded at runtime — it is not directly referenceable + /// from the net10 host at compile time. + /// + /// + private void PrePopulateLookupKeysFromPackage() + { + try + { + // Locate ImportCommonMethods in the runtime-loaded CMT assembly. + Type? importCommonType = Type.GetType( + "Microsoft.Xrm.Tooling.Dmt.DataMigCommon.DataInteraction.ImportCommonMethods, Microsoft.Xrm.Tooling.Dmt.DataMigCommon", + throwOnError: false); + + if (importCommonType == null) + { + _logger.LogDebug("LookupKeys pre-population skipped — ImportCommonMethods type not found"); + return; + } + + // Retrieve the static LookupKeys ConcurrentDictionary. + var lookupKeysField = importCommonType.GetField( + "LookupKeys", BindingFlags.Public | BindingFlags.Static); + object? lookupKeys = lookupKeysField?.GetValue(null); + if (lookupKeys == null) + { + _logger.LogDebug("LookupKeys pre-population skipped — LookupKeys field not found or null"); + return; + } + + // Get TryAdd(string, Guid) on the ConcurrentDictionary. + MethodInfo? tryAdd = lookupKeys.GetType().GetMethod( + "TryAdd", [typeof(string), typeof(Guid)]); + if (tryAdd == null) + { + _logger.LogDebug("LookupKeys pre-population skipped — TryAdd method not found on LookupKeys"); + return; + } + + // Retrieve the static dataEntities property. + var dataEntitiesProp = importCommonType.GetProperty( + "dataEntities", BindingFlags.Public | BindingFlags.Static); + object? dataEntities = dataEntitiesProp?.GetValue(null); + if (dataEntities == null) + { + _logger.LogDebug("LookupKeys pre-population skipped — dataEntities not loaded yet"); + return; + } + + // entities.entity → entitiesEntity[] + var entityArrayProp = dataEntities.GetType().GetProperty("entity"); + if (entityArrayProp?.GetValue(dataEntities) is not System.Collections.IEnumerable entityArray) + { + _logger.LogDebug("LookupKeys pre-population skipped — entity array not found"); + return; + } + + int count = 0; + int entityCount = 0; + foreach (object? entity in entityArray) + { + if (entity == null) continue; + entityCount++; + + string? entityName = entity.GetType() + .GetProperty("name")?.GetValue(entity) as string; + if (string.IsNullOrEmpty(entityName)) continue; + + // entitiesEntity.records → entitiesEntityRecord[] + var recordsProp = entity.GetType().GetProperty("records"); + if (recordsProp?.GetValue(entity) is not System.Collections.IEnumerable records) continue; + + foreach (object? record in records) + { + if (record == null) continue; + string? id = record.GetType() + .GetProperty("id")?.GetValue(record) as string; + if (string.IsNullOrEmpty(id)) continue; + if (!Guid.TryParse(id, out Guid guid)) continue; + + string key = string.Concat(entityName, ":", id); + tryAdd.Invoke(lookupKeys, [key, guid]); + count++; + } + } + + _logger.LogInformation( + "Pre-populated LookupKeys cache with {Count} records across {EntityCount} entities — " + + "eliminates server lookup calls for internal package references", + count, entityCount); + } + catch (Exception ex) + { + _logger.LogDebug(ex, + "LookupKeys pre-population failed — import will proceed with standard CMT lookup behavior"); + } + } } From 7b6760fe2b61702810720e3014e8aae825b6adf5 Mon Sep 17 00:00:00 2001 From: Tomas Prokop Date: Fri, 12 Jun 2026 11:36:18 +0200 Subject: [PATCH 2/6] fix(data/import): trigger LookupKeys pre-population on schema-validation-complete event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ValidateSchemaFile() returns before ImportCommonMethods.dataEntities is populated — data is only loaded inside ImportDataToCrm(). Hook into the 'Schema Validation Complete' progress event (fired once data is loaded) to call PrePopulateLookupKeysFromPackage() at the correct time. Also promote failure log messages from Debug to Information so they appear in normal output without --verbose. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../CmtImportRunner.cs | 54 +++++++++++-------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs index be67a06d..586a15b2 100644 --- a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs +++ b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs @@ -40,6 +40,13 @@ static CmtImportRunner() /// private int _failedStageCount; + /// + /// Guards the one-shot LookupKeys pre-population so it fires exactly once + /// when CMT fires the "Schema Validation Complete" progress event (which is + /// when ImportCommonMethods.dataEntities is first populated). + /// + private int _lookupKeysPrepopulated; + public CmtImportRunner() { _assemblyMap = new Dictionary( @@ -246,21 +253,7 @@ private async Task RunInternalAsync( _logger.LogInformation("Schema Validation Complete."); - // 8. Pre-populate LookupKeys for all package records. - // CMT's FindEntity() only sets record.newId when UpsertMultiple returns - // RecordCreated=true. For records that already exist in the target, - // RecordCreated=false → newId stays null → every child-entity lookup - // for that parent falls through to a separate server round-trip. - // - // Because CMT always preserves source GUIDs (UpsertRequest.Target.Id = - // source GUID), the target GUID equals the source GUID for every record - // in the package. Pre-seeding LookupKeys with this mapping lets - // FindEntity() short-circuit at the cache check (line 2851 in - // ImportCrmEntityActions.cs) without any server call, for both new - // records and records that already exist in the target. - PrePopulateLookupKeysFromPackage(); - - // 9. Import data (synchronous — blocks via internal WaitOne). + // 8. Import data (synchronous — blocks via internal WaitOne). _logger.LogInformation("Starting data import..."); // NOTE: The deleteBeforeAdd parameter is accepted by CMT's API but // is never actually used internally — the delete functionality was @@ -371,6 +364,18 @@ private void OnUpdateProgressItem(object? sender, ProgressItemEventArgs e) return; string message = e.progressItem.ItemText ?? string.Empty; + + // CMT fires "Schema Validation Complete" once ImportDataToCrm() has + // parsed both data_schema.xml and data.xml and populated + // ImportCommonMethods.dataEntities. This is the earliest safe point to + // pre-seed LookupKeys, because dataEntities is null before this event. + if (e.progressItem.ItemStatus == ProgressItemStatus.Complete + && message.StartsWith("Schema Validation Complete", StringComparison.OrdinalIgnoreCase) + && Interlocked.CompareExchange(ref _lookupKeysPrepopulated, 1, 0) == 0) + { + PrePopulateLookupKeysFromPackage(); + } + switch (e.progressItem.ItemStatus) { case ProgressItemStatus.Complete: @@ -481,13 +486,16 @@ private void PrePopulateLookupKeysFromPackage() try { // Locate ImportCommonMethods in the runtime-loaded CMT assembly. - Type? importCommonType = Type.GetType( - "Microsoft.Xrm.Tooling.Dmt.DataMigCommon.DataInteraction.ImportCommonMethods, Microsoft.Xrm.Tooling.Dmt.DataMigCommon", - throwOnError: false); + // Type.GetType() with assembly-qualified name often fails for Cecil-patched + // legacy assemblies; scan all loaded assemblies by partial name instead. + Type? importCommonType = AppDomain.CurrentDomain.GetAssemblies() + .Where(a => a.GetName().Name?.Contains("DataMigCommon", StringComparison.OrdinalIgnoreCase) == true) + .Select(a => a.GetType("Microsoft.Xrm.Tooling.Dmt.DataMigCommon.DataInteraction.ImportCommonMethods")) + .FirstOrDefault(t => t != null); if (importCommonType == null) { - _logger.LogDebug("LookupKeys pre-population skipped — ImportCommonMethods type not found"); + _logger.LogInformation("LookupKeys pre-population skipped — ImportCommonMethods type not found in loaded assemblies"); return; } @@ -497,7 +505,7 @@ private void PrePopulateLookupKeysFromPackage() object? lookupKeys = lookupKeysField?.GetValue(null); if (lookupKeys == null) { - _logger.LogDebug("LookupKeys pre-population skipped — LookupKeys field not found or null"); + _logger.LogInformation("LookupKeys pre-population skipped — LookupKeys field not found or null"); return; } @@ -506,7 +514,7 @@ private void PrePopulateLookupKeysFromPackage() "TryAdd", [typeof(string), typeof(Guid)]); if (tryAdd == null) { - _logger.LogDebug("LookupKeys pre-population skipped — TryAdd method not found on LookupKeys"); + _logger.LogInformation("LookupKeys pre-population skipped — TryAdd method not found on LookupKeys"); return; } @@ -516,7 +524,7 @@ private void PrePopulateLookupKeysFromPackage() object? dataEntities = dataEntitiesProp?.GetValue(null); if (dataEntities == null) { - _logger.LogDebug("LookupKeys pre-population skipped — dataEntities not loaded yet"); + _logger.LogInformation("LookupKeys pre-population skipped — dataEntities not loaded yet"); return; } @@ -564,7 +572,7 @@ private void PrePopulateLookupKeysFromPackage() } catch (Exception ex) { - _logger.LogDebug(ex, + _logger.LogInformation(ex, "LookupKeys pre-population failed — import will proceed with standard CMT lookup behavior"); } } From f2c4ce0fa108a81d07c10a1f5b10edf1d1e8e9fa Mon Sep 17 00:00:00 2001 From: Tomas Prokop Date: Fri, 12 Jun 2026 12:50:37 +0200 Subject: [PATCH 3/6] fix(data/import): navigate to runtime DataMigCommon assembly via handler type AppDomain.GetAssemblies() may return both the compile-time net462 copy and the runtime-loaded copy of DataMigCommon; they carry separate static state. Using handler.GetType().Assembly.GetReferencedAssemblies() ensures we find the same DataMigCommon instance that CMT's ImportDataToCrm uses. Also adds assembly full-name log at Information level so we can verify which instance is being used. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../CmtImportRunner.cs | 55 ++++++++++++++++--- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs index 586a15b2..7106db2c 100644 --- a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs +++ b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs @@ -47,6 +47,15 @@ static CmtImportRunner() /// private int _lookupKeysPrepopulated; + /// + /// Runtime type of the ImportCrmDataHandler (set during RunInternalAsync). + /// Used by to navigate to + /// the correct runtime-loaded DataMigCommon assembly, which may differ from + /// the net462 compile-time reference if the assembly resolver returned a + /// different instance. + /// + private Type? _handlerRuntimeType; + public CmtImportRunner() { _assemblyMap = new Dictionary( @@ -198,6 +207,10 @@ private async Task RunInternalAsync( ConfigurationManager.AppSettings["ExportFiles"] = "true"; // 4. Wire progress event handlers. + // Capture the RUNTIME type so PrePopulateLookupKeysFromPackage can + // navigate to the correct DataMigCommon assembly instance (which may + // differ from the net462 compile-time reference). + _handlerRuntimeType = handler.GetType(); handler.AddNewProgressItem += OnAddNewProgressItem; handler.UpdateProgressItem += OnUpdateProgressItem; @@ -485,13 +498,38 @@ private void PrePopulateLookupKeysFromPackage() { try { - // Locate ImportCommonMethods in the runtime-loaded CMT assembly. - // Type.GetType() with assembly-qualified name often fails for Cecil-patched - // legacy assemblies; scan all loaded assemblies by partial name instead. - Type? importCommonType = AppDomain.CurrentDomain.GetAssemblies() - .Where(a => a.GetName().Name?.Contains("DataMigCommon", StringComparison.OrdinalIgnoreCase) == true) - .Select(a => a.GetType("Microsoft.Xrm.Tooling.Dmt.DataMigCommon.DataInteraction.ImportCommonMethods")) - .FirstOrDefault(t => t != null); + // Navigate to the RUNTIME-loaded DataMigCommon assembly via the + // handler's type. AppDomain.GetAssemblies() may contain both the + // compile-time net462 copy and the runtime-loaded copy; they carry + // separate static state, so we must find the one CMT actually uses. + Type? importCommonType = null; + if (_handlerRuntimeType != null) + { + // Walk ImportProcessor's referenced assemblies to find DataMigCommon. + foreach (var asmRef in _handlerRuntimeType.Assembly.GetReferencedAssemblies()) + { + if (asmRef.Name?.Contains("DataMigCommon", StringComparison.OrdinalIgnoreCase) != true) + continue; + + // Get the already-loaded instance with the same name. + var loaded = AppDomain.CurrentDomain.GetAssemblies() + .FirstOrDefault(a => string.Equals(a.GetName().Name, asmRef.Name, StringComparison.OrdinalIgnoreCase)); + if (loaded == null) continue; + + importCommonType = loaded.GetType( + "Microsoft.Xrm.Tooling.Dmt.DataMigCommon.DataInteraction.ImportCommonMethods"); + if (importCommonType != null) break; + } + } + + // Fallback: scan all loaded assemblies (less precise but still useful). + if (importCommonType == null) + { + importCommonType = AppDomain.CurrentDomain.GetAssemblies() + .Where(a => a.GetName().Name?.Contains("DataMigCommon", StringComparison.OrdinalIgnoreCase) == true) + .Select(a => a.GetType("Microsoft.Xrm.Tooling.Dmt.DataMigCommon.DataInteraction.ImportCommonMethods")) + .FirstOrDefault(t => t != null); + } if (importCommonType == null) { @@ -499,6 +537,9 @@ private void PrePopulateLookupKeysFromPackage() return; } + _logger.LogInformation("LookupKeys pre-population: using type from assembly {Assembly}", + importCommonType.Assembly.FullName); + // Retrieve the static LookupKeys ConcurrentDictionary. var lookupKeysField = importCommonType.GetField( "LookupKeys", BindingFlags.Public | BindingFlags.Static); From e4af4dafdd6901a1b77a9cf6846b683cfcfe5377 Mon Sep 17 00:00:00 2001 From: Tomas Prokop Date: Fri, 12 Jun 2026 13:31:56 +0200 Subject: [PATCH 4/6] fix(data/import): move LookupKeys pre-population after ClearCrossReferanceList MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CMT calls ImportCommonMethods.ClearCrossReferanceList() (which calls LookupKeys.Clear()) at line 683 of ImportDataToCrm, AFTER the Schema Validation Complete event fires at line 664. Pre-populating on the Schema Validation Complete event is therefore too early — the dict is wiped before entity processing begins. New trigger: first AddNewProgressItem whose text starts with 'Processing Entity:' (fired by BeginEntityImport before any record pre-processing starts), which executes after ClearCrossReferanceList. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../CmtImportRunner.cs | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs index 7106db2c..0b88f571 100644 --- a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs +++ b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs @@ -368,6 +368,19 @@ private void TryWireCmtConsoleLogging(ImportCrmDataHandler handler, bool verbose private void OnAddNewProgressItem(object? sender, ProgressItemEventArgs e) { + // "Processing Entity: " is the first AddNewProgressItem fired by + // ImportCrmEntityActions.BeginEntityImport() — it fires AFTER + // ImportDataToCrm() has called ImportCommonMethods.ClearCrossReferanceList() + // (which clears LookupKeys), so this is the correct point to pre-seed the + // cache. Hooking on "Schema Validation Complete" fires too early — CMT wipes + // LookupKeys via ClearCrossReferanceList() after that event returns. + string message = e.progressItem?.ItemText ?? string.Empty; + if (message.StartsWith("Processing Entity:", StringComparison.OrdinalIgnoreCase) + && Interlocked.CompareExchange(ref _lookupKeysPrepopulated, 1, 0) == 0) + { + PrePopulateLookupKeysFromPackage(); + } + OnUpdateProgressItem(sender, e); } @@ -378,17 +391,6 @@ private void OnUpdateProgressItem(object? sender, ProgressItemEventArgs e) string message = e.progressItem.ItemText ?? string.Empty; - // CMT fires "Schema Validation Complete" once ImportDataToCrm() has - // parsed both data_schema.xml and data.xml and populated - // ImportCommonMethods.dataEntities. This is the earliest safe point to - // pre-seed LookupKeys, because dataEntities is null before this event. - if (e.progressItem.ItemStatus == ProgressItemStatus.Complete - && message.StartsWith("Schema Validation Complete", StringComparison.OrdinalIgnoreCase) - && Interlocked.CompareExchange(ref _lookupKeysPrepopulated, 1, 0) == 0) - { - PrePopulateLookupKeysFromPackage(); - } - switch (e.progressItem.ItemStatus) { case ProgressItemStatus.Complete: From 25c4fce755166657e6b82412420414fddbcac52e Mon Sep 17 00:00:00 2001 From: Tomas Prokop Date: Fri, 12 Jun 2026 15:00:56 +0200 Subject: [PATCH 5/6] fix(data/import): also pre-set newId on dataEntities records MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LookupKeys pre-population may miss if the dict key format doesn't match what CMT produces at runtime (e.g. different entity name casing or key construction differences). Add a second strategy: set newId = id on every entitiesEntityRecord before processing starts. CMT's FindEntity checks newId at line 2912 — if non-empty, it returns the GUID immediately without a server call and adds it to LookupKeys for future references. Since CMT always preserves source GUIDs (UpsertRequest.Target.Id = source GUID), newId == id is always correct. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../CmtImportRunner.cs | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs index 0b88f571..bc2861f8 100644 --- a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs +++ b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs @@ -594,22 +594,47 @@ private void PrePopulateLookupKeysFromPackage() var recordsProp = entity.GetType().GetProperty("records"); if (recordsProp?.GetValue(entity) is not System.Collections.IEnumerable records) continue; + PropertyInfo? idProp = null; + PropertyInfo? newIdProp = null; + foreach (object? record in records) { if (record == null) continue; - string? id = record.GetType() - .GetProperty("id")?.GetValue(record) as string; + + // Cache property lookups after first record. + if (idProp == null) + { + idProp = record.GetType().GetProperty("id"); + newIdProp = record.GetType().GetProperty("newId"); + } + + string? id = idProp?.GetValue(record) as string; if (string.IsNullOrEmpty(id)) continue; if (!Guid.TryParse(id, out Guid guid)) continue; + // Strategy A: pre-seed LookupKeys so FindEntity() short-circuits + // at the cache check (line 2851 of ImportCrmEntityActions.cs). string key = string.Concat(entityName, ":", id); tryAdd.Invoke(lookupKeys, [key, guid]); + + // Strategy B: set newId = id on the record object so that even + // if the LookupKeys check misses, FindEntity() at line 2912 finds + // a non-empty newId and returns without a server round-trip. + // CMT preserves source GUIDs (UpsertRequest.Target.Id = source GUID), + // so newId == id is always correct for package-internal references. + if (newIdProp != null) + { + string? existingNewId = newIdProp.GetValue(record) as string; + if (string.IsNullOrWhiteSpace(existingNewId)) + newIdProp.SetValue(record, id); + } + count++; } } _logger.LogInformation( - "Pre-populated LookupKeys cache with {Count} records across {EntityCount} entities — " + "Pre-populated LookupKeys cache and set newId for {Count} records across {EntityCount} entities — " + "eliminates server lookup calls for internal package references", count, entityCount); } From ac7d401b0cca17d354cd603cfa205a70e142bef4 Mon Sep 17 00:00:00 2001 From: Tomas Prokop Date: Fri, 3 Jul 2026 10:16:00 +0200 Subject: [PATCH 6/6] refactor(data/import): extract LookupKeys/newId prepopulation to dedicated class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moves the reflection-heavy pre-population logic out of CmtImportRunner (which mixes CLI orchestration, assembly resolution, and progress-event wiring) into a focused, testable CmtLookupKeysPrepopulator static class with small single-purpose helper methods instead of one 150-line method. No behavioral change: re-validated with a fresh 4,127-record referentially closed subset — 8 LOOKUP TO CRM calls (vs 13 for the 3,679-record subset before this refactor), confirming parity. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../CmtImportRunner.cs | 191 +----------- .../CmtLookupKeysPrepopulator.cs | 284 ++++++++++++++++++ 2 files changed, 289 insertions(+), 186 deletions(-) create mode 100644 src/TALXIS.CLI.Platform.Xrm/CmtLookupKeysPrepopulator.cs diff --git a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs index bc2861f8..900588bb 100644 --- a/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs +++ b/src/TALXIS.CLI.Platform.Xrm/CmtImportRunner.cs @@ -42,14 +42,15 @@ static CmtImportRunner() /// /// Guards the one-shot LookupKeys pre-population so it fires exactly once - /// when CMT fires the "Schema Validation Complete" progress event (which is - /// when ImportCommonMethods.dataEntities is first populated). + /// when CMT fires the first "Processing Entity:" progress event (which is + /// after ImportCommonMethods.dataEntities is populated and + /// ClearCrossReferanceList() has already run). /// private int _lookupKeysPrepopulated; /// /// Runtime type of the ImportCrmDataHandler (set during RunInternalAsync). - /// Used by to navigate to + /// Used by to navigate to /// the correct runtime-loaded DataMigCommon assembly, which may differ from /// the net462 compile-time reference if the assembly resolver returned a /// different instance. @@ -378,7 +379,7 @@ private void OnAddNewProgressItem(object? sender, ProgressItemEventArgs e) if (message.StartsWith("Processing Entity:", StringComparison.OrdinalIgnoreCase) && Interlocked.CompareExchange(ref _lookupKeysPrepopulated, 1, 0) == 0) { - PrePopulateLookupKeysFromPackage(); + CmtLookupKeysPrepopulator.Prepopulate(_handlerRuntimeType, _logger); } OnUpdateProgressItem(sender, e); @@ -462,186 +463,4 @@ private void RegisterExtractedDirectoryForProbing(string directory) return null; } - /// - /// Pre-seeds ImportCommonMethods.LookupKeys with every record - /// in the loaded package so that FindEntity() can short-circuit the - /// cache lookup for internal package references without a server round-trip. - /// - /// - /// CMT's UpsertMultiple handler only sets record.newId when - /// RecordCreated == true (line 884 of ImportCrmEntityActions.cs). - /// For records that already exist in the target (RecordCreated=false), - /// newId stays null, so every child-entity lookup for that parent falls - /// through to LookupCustomerOrLookupFieldInCRM — one server call per - /// unique lookup reference. - /// - /// - /// - /// Because CMT always preserves source GUIDs (the UpsertRequest target - /// entity's Id is set to the source record GUID), the target GUID is - /// always equal to the source GUID. Pre-seeding the cache with - /// "entityName:sourceGuid" → sourceGuid is therefore always correct, - /// regardless of whether the record is new or pre-existing in the target. - /// - /// - /// - /// This does not affect external lookups (entities not present in the package) - /// — those still resolve via the normal server call path. - /// - /// - /// - /// Accesses CMT internals via reflection because - /// Microsoft.Xrm.Tooling.Dmt.DataMigCommon is a net462 legacy assembly - /// that is patched and loaded at runtime — it is not directly referenceable - /// from the net10 host at compile time. - /// - /// - private void PrePopulateLookupKeysFromPackage() - { - try - { - // Navigate to the RUNTIME-loaded DataMigCommon assembly via the - // handler's type. AppDomain.GetAssemblies() may contain both the - // compile-time net462 copy and the runtime-loaded copy; they carry - // separate static state, so we must find the one CMT actually uses. - Type? importCommonType = null; - if (_handlerRuntimeType != null) - { - // Walk ImportProcessor's referenced assemblies to find DataMigCommon. - foreach (var asmRef in _handlerRuntimeType.Assembly.GetReferencedAssemblies()) - { - if (asmRef.Name?.Contains("DataMigCommon", StringComparison.OrdinalIgnoreCase) != true) - continue; - - // Get the already-loaded instance with the same name. - var loaded = AppDomain.CurrentDomain.GetAssemblies() - .FirstOrDefault(a => string.Equals(a.GetName().Name, asmRef.Name, StringComparison.OrdinalIgnoreCase)); - if (loaded == null) continue; - - importCommonType = loaded.GetType( - "Microsoft.Xrm.Tooling.Dmt.DataMigCommon.DataInteraction.ImportCommonMethods"); - if (importCommonType != null) break; - } - } - - // Fallback: scan all loaded assemblies (less precise but still useful). - if (importCommonType == null) - { - importCommonType = AppDomain.CurrentDomain.GetAssemblies() - .Where(a => a.GetName().Name?.Contains("DataMigCommon", StringComparison.OrdinalIgnoreCase) == true) - .Select(a => a.GetType("Microsoft.Xrm.Tooling.Dmt.DataMigCommon.DataInteraction.ImportCommonMethods")) - .FirstOrDefault(t => t != null); - } - - if (importCommonType == null) - { - _logger.LogInformation("LookupKeys pre-population skipped — ImportCommonMethods type not found in loaded assemblies"); - return; - } - - _logger.LogInformation("LookupKeys pre-population: using type from assembly {Assembly}", - importCommonType.Assembly.FullName); - - // Retrieve the static LookupKeys ConcurrentDictionary. - var lookupKeysField = importCommonType.GetField( - "LookupKeys", BindingFlags.Public | BindingFlags.Static); - object? lookupKeys = lookupKeysField?.GetValue(null); - if (lookupKeys == null) - { - _logger.LogInformation("LookupKeys pre-population skipped — LookupKeys field not found or null"); - return; - } - - // Get TryAdd(string, Guid) on the ConcurrentDictionary. - MethodInfo? tryAdd = lookupKeys.GetType().GetMethod( - "TryAdd", [typeof(string), typeof(Guid)]); - if (tryAdd == null) - { - _logger.LogInformation("LookupKeys pre-population skipped — TryAdd method not found on LookupKeys"); - return; - } - - // Retrieve the static dataEntities property. - var dataEntitiesProp = importCommonType.GetProperty( - "dataEntities", BindingFlags.Public | BindingFlags.Static); - object? dataEntities = dataEntitiesProp?.GetValue(null); - if (dataEntities == null) - { - _logger.LogInformation("LookupKeys pre-population skipped — dataEntities not loaded yet"); - return; - } - - // entities.entity → entitiesEntity[] - var entityArrayProp = dataEntities.GetType().GetProperty("entity"); - if (entityArrayProp?.GetValue(dataEntities) is not System.Collections.IEnumerable entityArray) - { - _logger.LogDebug("LookupKeys pre-population skipped — entity array not found"); - return; - } - - int count = 0; - int entityCount = 0; - foreach (object? entity in entityArray) - { - if (entity == null) continue; - entityCount++; - - string? entityName = entity.GetType() - .GetProperty("name")?.GetValue(entity) as string; - if (string.IsNullOrEmpty(entityName)) continue; - - // entitiesEntity.records → entitiesEntityRecord[] - var recordsProp = entity.GetType().GetProperty("records"); - if (recordsProp?.GetValue(entity) is not System.Collections.IEnumerable records) continue; - - PropertyInfo? idProp = null; - PropertyInfo? newIdProp = null; - - foreach (object? record in records) - { - if (record == null) continue; - - // Cache property lookups after first record. - if (idProp == null) - { - idProp = record.GetType().GetProperty("id"); - newIdProp = record.GetType().GetProperty("newId"); - } - - string? id = idProp?.GetValue(record) as string; - if (string.IsNullOrEmpty(id)) continue; - if (!Guid.TryParse(id, out Guid guid)) continue; - - // Strategy A: pre-seed LookupKeys so FindEntity() short-circuits - // at the cache check (line 2851 of ImportCrmEntityActions.cs). - string key = string.Concat(entityName, ":", id); - tryAdd.Invoke(lookupKeys, [key, guid]); - - // Strategy B: set newId = id on the record object so that even - // if the LookupKeys check misses, FindEntity() at line 2912 finds - // a non-empty newId and returns without a server round-trip. - // CMT preserves source GUIDs (UpsertRequest.Target.Id = source GUID), - // so newId == id is always correct for package-internal references. - if (newIdProp != null) - { - string? existingNewId = newIdProp.GetValue(record) as string; - if (string.IsNullOrWhiteSpace(existingNewId)) - newIdProp.SetValue(record, id); - } - - count++; - } - } - - _logger.LogInformation( - "Pre-populated LookupKeys cache and set newId for {Count} records across {EntityCount} entities — " - + "eliminates server lookup calls for internal package references", - count, entityCount); - } - catch (Exception ex) - { - _logger.LogInformation(ex, - "LookupKeys pre-population failed — import will proceed with standard CMT lookup behavior"); - } - } } diff --git a/src/TALXIS.CLI.Platform.Xrm/CmtLookupKeysPrepopulator.cs b/src/TALXIS.CLI.Platform.Xrm/CmtLookupKeysPrepopulator.cs new file mode 100644 index 00000000..3c43d1d4 --- /dev/null +++ b/src/TALXIS.CLI.Platform.Xrm/CmtLookupKeysPrepopulator.cs @@ -0,0 +1,284 @@ +using System.Reflection; +using Microsoft.Extensions.Logging; + +namespace TALXIS.CLI.Platform.Xrm; + +/// +/// Pre-seeds CMT's internal lookup-resolution state for package-internal +/// references, eliminating redundant "LOOKUP TO CRM" server round-trips +/// during import. +/// +/// +/// Root cause: CMT's FindEntity() +/// (ImportCrmEntityActions.cs) falls back to a live server query +/// whenever a referenced record's in-memory newId is empty. +/// newId is only populated once the batch response callback for +/// that specific record's create/upsert has fired. With +/// --batch-mode and --connection-count > 1, CMT dispatches +/// an entity's batches asynchronously and can begin preprocessing the next +/// (dependent) entity before every parent batch has actually received its +/// response — so a perfectly valid, already-created parent record still +/// looks "unresolved" locally, triggering a costly live lookup that almost +/// always just confirms what CMT could already have known. This is a local +/// cache-staleness / async race inherent to CMT's batch+multi-connection +/// architecture, not a genuine missing-dependency or ordering problem. +/// +/// +/// +/// Fix: because CMT always preserves source GUIDs (an upserted +/// record's target Id is set to the source record's GUID), the +/// target GUID is always equal to the source GUID for package-internal +/// references. We can therefore pre-seed both of FindEntity()'s +/// lookup paths — the LookupKeys cache and each record's own +/// newId field — before any batch is ever dispatched, removing the +/// dependency on batch-response timing entirely. This does not affect +/// external lookups (entities not present in the package); those still +/// resolve via the normal server call path. +/// +/// +/// +/// Accesses CMT internals via reflection because +/// Microsoft.Xrm.Tooling.Dmt.DataMigCommon is a net462 legacy +/// assembly that is patched and loaded at runtime — it is not directly +/// referenceable from the net10 host at compile time. +/// +/// +internal static class CmtLookupKeysPrepopulator +{ + private const string ImportCommonMethodsTypeName = + "Microsoft.Xrm.Tooling.Dmt.DataMigCommon.DataInteraction.ImportCommonMethods"; + + /// + /// Result of a pre-population run, for logging/telemetry by the caller. + /// + public readonly record struct Result(int RecordCount, int EntityCount); + + /// + /// Pre-seeds the LookupKeys cache and newId fields for every record in + /// the already-loaded package. Safe to call multiple times; each call + /// re-scans the current in-memory package data. + /// + /// + /// Runtime type of the ImportCrmDataHandler instance in use. Used to + /// navigate to the correct runtime-loaded DataMigCommon assembly, which + /// may differ from the net462 compile-time reference if the assembly + /// resolver returned a different instance (there can be two loaded + /// copies of DataMigCommon with independent static state). + /// + /// Logger for diagnostics. + /// + /// The number of records/entities updated, or null if + /// pre-population could not run (e.g. package not loaded yet). + /// + public static Result? Prepopulate(Type? handlerRuntimeType, ILogger logger) + { + try + { + Type? importCommonType = ResolveImportCommonMethodsType(handlerRuntimeType); + if (importCommonType == null) + { + logger.LogInformation( + "LookupKeys pre-population skipped — ImportCommonMethods type not found in loaded assemblies"); + return null; + } + + logger.LogInformation( + "LookupKeys pre-population: using type from assembly {Assembly}", + importCommonType.Assembly.FullName); + + if (!TryGetLookupKeysTryAdd(importCommonType, logger, out object? lookupKeys, out MethodInfo? tryAdd)) + return null; + + if (!TryGetDataEntities(importCommonType, logger, out System.Collections.IEnumerable? entityArray)) + return null; + + Result result = PrepopulateRecords(entityArray!, lookupKeys!, tryAdd!); + + logger.LogInformation( + "Pre-populated LookupKeys cache and set newId for {Count} records across {EntityCount} entities — " + + "eliminates server lookup calls for internal package references", + result.RecordCount, result.EntityCount); + + return result; + } + catch (Exception ex) + { + logger.LogInformation(ex, + "LookupKeys pre-population failed — import will proceed with standard CMT lookup behavior"); + return null; + } + } + + /// + /// Navigates to the RUNTIME-loaded DataMigCommon assembly via the + /// handler's type. AppDomain.GetAssemblies() may contain both the + /// compile-time net462 copy and the runtime-loaded copy; they carry + /// separate static state, so we must find the one CMT actually uses. + /// + private static Type? ResolveImportCommonMethodsType(Type? handlerRuntimeType) + { + if (handlerRuntimeType != null) + { + foreach (AssemblyName asmRef in handlerRuntimeType.Assembly.GetReferencedAssemblies()) + { + if (asmRef.Name?.Contains("DataMigCommon", StringComparison.OrdinalIgnoreCase) != true) + continue; + + Assembly? loaded = AppDomain.CurrentDomain.GetAssemblies() + .FirstOrDefault(a => string.Equals(a.GetName().Name, asmRef.Name, StringComparison.OrdinalIgnoreCase)); + if (loaded == null) continue; + + Type? candidate = loaded.GetType(ImportCommonMethodsTypeName); + if (candidate != null) return candidate; + } + } + + // Fallback: scan all loaded assemblies (less precise but still useful). + return AppDomain.CurrentDomain.GetAssemblies() + .Where(a => a.GetName().Name?.Contains("DataMigCommon", StringComparison.OrdinalIgnoreCase) == true) + .Select(a => a.GetType(ImportCommonMethodsTypeName)) + .FirstOrDefault(t => t != null); + } + + /// + /// Retrieves the static LookupKeys ConcurrentDictionary<string, Guid> + /// and its TryAdd(string, Guid) method. + /// + private static bool TryGetLookupKeysTryAdd( + Type importCommonType, + ILogger logger, + out object? lookupKeys, + out MethodInfo? tryAdd) + { + lookupKeys = null; + tryAdd = null; + + FieldInfo? lookupKeysField = importCommonType.GetField( + "LookupKeys", BindingFlags.Public | BindingFlags.Static); + lookupKeys = lookupKeysField?.GetValue(null); + if (lookupKeys == null) + { + logger.LogInformation("LookupKeys pre-population skipped — LookupKeys field not found or null"); + return false; + } + + tryAdd = lookupKeys.GetType().GetMethod("TryAdd", [typeof(string), typeof(Guid)]); + if (tryAdd == null) + { + logger.LogInformation("LookupKeys pre-population skipped — TryAdd method not found on LookupKeys"); + return false; + } + + return true; + } + + /// + /// Retrieves the static dataEntities property and its entity array. + /// + private static bool TryGetDataEntities( + Type importCommonType, + ILogger logger, + out System.Collections.IEnumerable? entityArray) + { + entityArray = null; + + PropertyInfo? dataEntitiesProp = importCommonType.GetProperty( + "dataEntities", BindingFlags.Public | BindingFlags.Static); + object? dataEntities = dataEntitiesProp?.GetValue(null); + if (dataEntities == null) + { + logger.LogInformation("LookupKeys pre-population skipped — dataEntities not loaded yet"); + return false; + } + + PropertyInfo? entityArrayProp = dataEntities.GetType().GetProperty("entity"); + entityArray = entityArrayProp?.GetValue(dataEntities) as System.Collections.IEnumerable; + if (entityArray == null) + { + logger.LogDebug("LookupKeys pre-population skipped — entity array not found"); + return false; + } + + return true; + } + + /// + /// Walks every entity/record in the package and applies both + /// pre-population strategies. + /// + private static Result PrepopulateRecords( + System.Collections.IEnumerable entityArray, + object lookupKeys, + MethodInfo tryAdd) + { + int count = 0; + int entityCount = 0; + + foreach (object? entity in entityArray) + { + if (entity == null) continue; + entityCount++; + + string? entityName = entity.GetType().GetProperty("name")?.GetValue(entity) as string; + if (string.IsNullOrEmpty(entityName)) continue; + + if (entity.GetType().GetProperty("records")?.GetValue(entity) + is not System.Collections.IEnumerable records) + { + continue; + } + + count += PrepopulateEntityRecords(entityName, records, lookupKeys, tryAdd); + } + + return new Result(count, entityCount); + } + + private static int PrepopulateEntityRecords( + string entityName, + System.Collections.IEnumerable records, + object lookupKeys, + MethodInfo tryAdd) + { + int count = 0; + PropertyInfo? idProp = null; + PropertyInfo? newIdProp = null; + + foreach (object? record in records) + { + if (record == null) continue; + + // Cache property lookups after first record. + if (idProp == null) + { + idProp = record.GetType().GetProperty("id"); + newIdProp = record.GetType().GetProperty("newId"); + } + + string? id = idProp?.GetValue(record) as string; + if (string.IsNullOrEmpty(id)) continue; + if (!Guid.TryParse(id, out Guid guid)) continue; + + // Strategy A: pre-seed LookupKeys so FindEntity() short-circuits + // at the cache check (line 2851 of ImportCrmEntityActions.cs). + string key = string.Concat(entityName, ":", id); + tryAdd.Invoke(lookupKeys, [key, guid]); + + // Strategy B: set newId = id on the record object so that even + // if the LookupKeys check misses, FindEntity() at line 2912 finds + // a non-empty newId and returns without a server round-trip. + // CMT preserves source GUIDs (UpsertRequest.Target.Id = source GUID), + // so newId == id is always correct for package-internal references. + if (newIdProp != null) + { + string? existingNewId = newIdProp.GetValue(record) as string; + if (string.IsNullOrWhiteSpace(existingNewId)) + newIdProp.SetValue(record, id); + } + + count++; + } + + return count; + } +}