Skip to content

Commit 4e2d25d

Browse files
committed
Attempt to add morph-tokens to headwords
1 parent bc05534 commit 4e2d25d

29 files changed

Lines changed: 317 additions & 77 deletions

File tree

backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,7 @@ private Entry FromLexEntry(ILexEntry entry)
652652
{
653653
try
654654
{
655-
return new Entry
655+
var result = new Entry
656656
{
657657
Id = entry.Guid,
658658
Note = FromLcmMultiString(entry.Comment),
@@ -670,6 +670,8 @@ private Entry FromLexEntry(ILexEntry entry)
670670
// ILexEntry.PublishIn is a virtual property that inverts DoNotPublishInRC against all publications
671671
PublishIn = entry.PublishIn.Select(FromLcmPossibility).ToList(),
672672
};
673+
result.Headword = ComputeHeadword(result, entry.PrimaryMorphType);
674+
return result;
673675
}
674676
catch (Exception e)
675677
{
@@ -678,6 +680,35 @@ private Entry FromLexEntry(ILexEntry entry)
678680
}
679681
}
680682

683+
private static MultiString ComputeHeadword(Entry result, IMoMorphType? lcmMorphType)
684+
{
685+
var headword = new MultiString();
686+
var leading = lcmMorphType?.Prefix ?? "";
687+
var trailing = lcmMorphType?.Postfix ?? "";
688+
689+
// Iterate all WS keys that have data, not just "current" vernacular WSs,
690+
// so we don't lose headwords for non-current or future writing systems.
691+
var wsIds = result.CitationForm.Values.Keys
692+
.Union(result.LexemeForm.Values.Keys);
693+
694+
foreach (var wsId in wsIds)
695+
{
696+
var citation = result.CitationForm[wsId];
697+
if (!string.IsNullOrEmpty(citation))
698+
{
699+
headword[wsId] = citation.Trim();
700+
continue;
701+
}
702+
703+
var lexeme = result.LexemeForm[wsId];
704+
if (!string.IsNullOrEmpty(lexeme))
705+
{
706+
headword[wsId] = (leading + lexeme + trailing).Trim();
707+
}
708+
}
709+
return headword;
710+
}
711+
681712
private List<ComplexFormType> ToComplexFormTypes(ILexEntry entry)
682713
{
683714
return entry.ComplexFormEntryRefs

backend/FwLite/FwLiteProjectSync.Tests/EntrySyncTests.cs

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ public async Task CanSyncRandomEntries(ApiType? roundTripApiType)
159159
.Select(createdCfc =>
160160
{
161161
var copy = createdCfc.Copy();
162-
copy.ComponentHeadword = after.Headword();
162+
copy.ComponentHeadword = after.HeadwordText();
163163
return copy;
164164
}),
165165
// keep new
@@ -175,7 +175,7 @@ public async Task CanSyncRandomEntries(ApiType? roundTripApiType)
175175
.Select(createdCfc =>
176176
{
177177
var copy = createdCfc.Copy();
178-
copy.ComplexFormHeadword = after.Headword();
178+
copy.ComplexFormHeadword = after.HeadwordText();
179179
return copy;
180180
}),
181181
// keep new
@@ -210,19 +210,18 @@ public async Task CanSyncRandomEntries(ApiType? roundTripApiType)
210210
{
211211
options = options
212212
.WithStrictOrdering()
213+
.Excluding(e => e.Headword) // Computed property, populated by Finalize
213214
.WithoutStrictOrderingFor(e => e.ComplexForms) // sorted alphabetically
214215
.WithoutStrictOrderingFor(e => e.Path.EndsWith($".{nameof(Sense.SemanticDomains)}")) // not sorted
215216
.For(e => e.Senses).Exclude(s => s.Order)
216217
.For(e => e.Components).Exclude(c => c.Order)
217218
.For(e => e.ComplexForms).Exclude(c => c.Order)
218219
.For(e => e.Senses).For(s => s.ExampleSentences).Exclude(e => e.Order);
219-
if (currentApiType == ApiType.Crdt)
220-
{
221-
// does not yet update Headwords 😕
222-
options = options
223-
.For(e => e.Components).Exclude(c => c.ComplexFormHeadword)
224-
.For(e => e.ComplexForms).Exclude(c => c.ComponentHeadword);
225-
}
220+
// ComplexFormHeadword/ComponentHeadword are computed from HeadwordText() which
221+
// depends on the computed Headword property — exclude since AutoFaker randomizes it.
222+
options = options
223+
.For(e => e.Components).Exclude(c => c.ComplexFormHeadword)
224+
.For(e => e.ComplexForms).Exclude(c => c.ComponentHeadword);
226225
if (currentApiType == ApiType.FwData)
227226
{
228227
// does not support changing MorphType yet (see UpdateEntryProxy.MorphType)
@@ -280,15 +279,15 @@ public async Task CanChangeComplexFormViaSync_Components()
280279
new ComplexFormComponent()
281280
{
282281
ComponentEntryId = component1.Id,
283-
ComponentHeadword = component1.Headword(),
282+
ComponentHeadword = component1.HeadwordText(),
284283
ComplexFormEntryId = complexFormId,
285284
ComplexFormHeadword = "complex form"
286285
}
287286
]
288287
});
289288
var complexFormAfter = complexForm.Copy();
290289
complexFormAfter.Components[0].ComponentEntryId = component2.Id;
291-
complexFormAfter.Components[0].ComponentHeadword = component2.Headword();
290+
complexFormAfter.Components[0].ComponentHeadword = component2.HeadwordText();
292291

293292
await EntrySync.SyncFull(complexForm, complexFormAfter, Api);
294293

@@ -315,13 +314,13 @@ public async Task CanChangeComplexFormViaSync_ComplexForms()
315314
ComponentEntryId = componentId,
316315
ComponentHeadword = "component",
317316
ComplexFormEntryId = complexForm1.Id,
318-
ComplexFormHeadword = complexForm1.Headword()
317+
ComplexFormHeadword = complexForm1.HeadwordText()
319318
}
320319
]
321320
});
322321
var componentAter = component.Copy();
323322
componentAter.ComplexForms[0].ComplexFormEntryId = complexForm2.Id;
324-
componentAter.ComplexForms[0].ComplexFormHeadword = complexForm2.Headword();
323+
componentAter.ComplexForms[0].ComplexFormHeadword = complexForm2.HeadwordText();
325324

326325
await EntrySync.SyncFull(component, componentAter, Api);
327326

@@ -413,11 +412,13 @@ public async Task CanSyncNewEntryReferencedByExistingEntry()
413412
// assert
414413
var actualExistingEntry = await Api.GetEntry(existingEntryAfter.Id);
415414
actualExistingEntry.Should().BeEquivalentTo(existingEntryAfter, options => options
415+
.Excluding(e => e.Headword)
416416
.For(e => e.ComplexForms).Exclude(c => c.Id)
417417
.For(e => e.ComplexForms).Exclude(c => c.Order));
418418

419419
var actualNewEntry = await Api.GetEntry(newEntry.Id);
420420
actualNewEntry.Should().BeEquivalentTo(newEntry, options => options
421+
.Excluding(e => e.Headword)
421422
.Excluding(e => e.ComplexFormTypes) // LibLcm automatically creates a complex form type. Should we?
422423
.For(e => e.Components).Exclude(c => c.Id)
423424
.For(e => e.Components).Exclude(c => c.Order));
@@ -487,13 +488,13 @@ public async Task SyncWithoutComplexFormsAndComponents_CorrectlySyncsUpdatedEntr
487488
// assert
488489
var actualComponent = await Api.GetEntry(componentAfter.Id);
489490
actualComponent.Should().BeEquivalentTo(componentAfter,
490-
options => options.Excluding(e => e.ComplexForms));
491+
options => options.Excluding(e => e.ComplexForms).Excluding(e => e.Headword));
491492
actualComponent.ComplexForms.Should().BeEmpty();
492493

493494
var actualComplexForm = await Api.GetEntry(complexForm.Id);
494495
addedComplexForm.Should().BeEquivalentTo(actualComplexForm);
495496
actualComplexForm.Should().BeEquivalentTo(complexForm,
496-
options => options.Excluding(e => e.Components));
497+
options => options.Excluding(e => e.Components).Excluding(e => e.Headword));
497498
actualComplexForm.Components.Should().BeEmpty();
498499
}
499500

@@ -526,13 +527,13 @@ public async Task SyncWithoutComplexFormsAndComponents_CorrectlySyncsAddedEntrie
526527
var actualComponent = await Api.GetEntry(component.Id);
527528
addedComponent.Should().BeEquivalentTo(actualComponent);
528529
actualComponent.Should().BeEquivalentTo(component,
529-
options => options.Excluding(e => e.ComplexForms));
530+
options => options.Excluding(e => e.ComplexForms).Excluding(e => e.Headword));
530531
actualComponent.ComplexForms.Should().BeEmpty();
531532

532533
var actualComplexForm = await Api.GetEntry(complexForm.Id);
533534
addedComplexForm.Should().BeEquivalentTo(actualComplexForm);
534535
actualComplexForm.Should().BeEquivalentTo(complexForm,
535-
options => options.Excluding(e => e.Components));
536+
options => options.Excluding(e => e.Components).Excluding(e => e.Headword));
536537
actualComplexForm.Components.Should().BeEmpty();
537538
}
538539

@@ -636,15 +637,15 @@ public async Task SyncComplexFormsAndComponents_MovesComponentsToCorrectPosition
636637
new ComplexFormComponent()
637638
{
638639
ComponentEntryId = componentA.Id,
639-
ComponentHeadword = componentA.Headword(),
640+
ComponentHeadword = componentA.HeadwordText(),
640641
ComplexFormEntryId = complexFormId,
641642
ComplexFormHeadword = "complex form",
642643
Order = 1
643644
},
644645
new ComplexFormComponent()
645646
{
646647
ComponentEntryId = componentB.Id,
647-
ComponentHeadword = componentB.Headword(),
648+
ComponentHeadword = componentB.HeadwordText(),
648649
ComplexFormEntryId = complexFormId,
649650
ComplexFormHeadword = "complex form",
650651
Order = 2

backend/FwLite/FwLiteProjectSync.Tests/SyncTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ public SyncTests(SyncFixture fixture)
8585
internal static EquivalencyOptions<Entry> SyncExclusions(EquivalencyOptions<Entry> options)
8686
{
8787
options = options
88+
.Excluding(e => e.Headword) // Computed property, populated by Finalize
8889
.For(e => e.Senses).Exclude(s => s.Order)
8990
.For(e => e.Senses).For(s => s.ExampleSentences).Exclude(s => s.Order)
9091
.For(e => e.Components).Exclude(c => c.Id)

backend/FwLite/FwLiteProjectSync.Tests/UpdateDiffTests.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ public void EntryDiffShouldUpdateAllFields()
2525
.Excluding(x => x.Components)
2626
.Excluding(x => x.ComplexForms)
2727
.Excluding(x => x.ComplexFormTypes)
28-
.Excluding(x => x.PublishIn);
28+
.Excluding(x => x.PublishIn)
29+
.Excluding(x => x.Headword); // Computed property, not synced
2930
});
3031
}
3132

backend/FwLite/FwLiteProjectSync/DryRunMiniLcmApi.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ public Task DeleteMorphTypeData(Guid id)
154154
public Task<Entry> CreateEntry(Entry entry, CreateEntryOptions? options)
155155
{
156156
options ??= new CreateEntryOptions();
157-
DryRunRecords.Add(new DryRunRecord(nameof(CreateEntry), $"Create entry {entry.Headword()} ({options})"));
157+
DryRunRecords.Add(new DryRunRecord(nameof(CreateEntry), $"Create entry {entry.HeadwordText()} ({options})"));
158158
// Only return what would have been persisted
159159
if (options.IncludeComplexFormsAndComponents)
160160
return Task.FromResult(entry);

backend/FwLite/LcmCrdt/Changes/Entries/AddEntryComponentChange.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ public override async ValueTask<ComplexFormComponent> NewEntity(Commit commit, I
4949
Id = EntityId,
5050
Order = Order,
5151
ComplexFormEntryId = ComplexFormEntryId,
52-
ComplexFormHeadword = complexFormEntry?.Headword(),
52+
ComplexFormHeadword = complexFormEntry?.HeadwordText(),
5353
ComponentEntryId = ComponentEntryId,
54-
ComponentHeadword = componentEntry?.Headword(),
54+
ComponentHeadword = componentEntry?.HeadwordText(),
5555
ComponentSenseId = ComponentSenseId,
5656
DeletedAt = shouldBeDeleted
5757
? commit.DateTime

backend/FwLite/LcmCrdt/Changes/Entries/SetComplexFormComponentChange.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,14 @@ public override async ValueTask ApplyChange(ComplexFormComponent entity, IChange
2828
{
2929
entity.ComplexFormEntryId = ComplexFormEntryId.Value;
3030
var complexFormEntry = await context.GetCurrent<Entry>(ComplexFormEntryId.Value);
31-
entity.ComplexFormHeadword = complexFormEntry?.Headword();
31+
entity.ComplexFormHeadword = complexFormEntry?.HeadwordText();
3232
entity.DeletedAt = complexFormEntry?.DeletedAt != null ? context.Commit.DateTime : (DateTime?)null;
3333
}
3434
if (ComponentEntryId.HasValue)
3535
{
3636
entity.ComponentEntryId = ComponentEntryId.Value;
3737
var componentEntry = await context.GetCurrent<Entry>(ComponentEntryId.Value);
38-
entity.ComponentHeadword = componentEntry?.Headword();
38+
entity.ComponentHeadword = componentEntry?.HeadwordText();
3939
entity.DeletedAt = componentEntry?.DeletedAt != null ? context.Commit.DateTime : (DateTime?)null;
4040
}
4141
entity.ComponentSenseId = ComponentSenseId;

backend/FwLite/LcmCrdt/Data/EntryQueryHelpers.cs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,87 @@ public static string Headword(this Entry e, WritingSystemId ws)
1717
(e, ws) => (string.IsNullOrEmpty(Json.Value(e.CitationForm, ms => ms[ws]))
1818
? Json.Value(e.LexemeForm, ms => ms[ws])
1919
: Json.Value(e.CitationForm, ms => ms[ws]))!.Trim();
20+
21+
[ExpressionMethod(nameof(HeadwordWithTokensExpression))]
22+
public static string HeadwordWithTokens(this Entry e, WritingSystemId ws, string? leading, string? trailing)
23+
{
24+
var citation = e.CitationForm[ws];
25+
if (!string.IsNullOrEmpty(citation)) return citation.Trim();
26+
var lexeme = e.LexemeForm[ws];
27+
if (string.IsNullOrEmpty(lexeme)) return string.Empty;
28+
return ((leading ?? "") + lexeme + (trailing ?? "")).Trim();
29+
}
30+
31+
private static Expression<Func<Entry, WritingSystemId, string?, string?, string?>> HeadwordWithTokensExpression() =>
32+
(e, ws, leading, trailing) =>
33+
string.IsNullOrEmpty(Json.Value(e.CitationForm, ms => ms[ws]))
34+
? string.IsNullOrEmpty(Json.Value(e.LexemeForm, ms => ms[ws]))
35+
? ""
36+
: ((leading ?? "") + Json.Value(e.LexemeForm, ms => ms[ws]) + (trailing ?? "")).Trim()
37+
: Json.Value(e.CitationForm, ms => ms[ws])!.Trim();
38+
39+
/// <summary>
40+
/// Checks whether any CitationForm or morph-token-decorated LexemeForm matches the query.
41+
/// Used with a MorphTypeData JOIN in EntrySearchService.FilterInternal.
42+
/// <para>
43+
/// Weird edge case we don't cover correctly:
44+
/// Given: lexeme: "ing", citation: "ing", morphType: Suffix
45+
/// The Headword is just "ing" (because citation overrides morph-type tokens)
46+
/// But this would still match the query "-ing".
47+
/// However, the FTS will NOT match it, so this weird edge case is actually covered.
48+
/// </summary>
49+
[ExpressionMethod(nameof(SearchHeadwordCandidatesExpression))]
50+
public static bool SearchHeadwordCandidates(this Entry e, string? leading, string? trailing, string query)
51+
{
52+
return e.CitationForm.SearchValue(query)
53+
|| e.LexemeForm.Values.Any(kvp =>
54+
SqlHelpers.ContainsIgnoreCaseAccents((leading ?? "") + kvp.Value + (trailing ?? ""), query));
55+
}
56+
57+
private static Expression<Func<Entry, string?, string?, string, bool>> SearchHeadwordCandidatesExpression()
58+
{
59+
return (e, leading, trailing, query) =>
60+
Json.QueryValues(e.CitationForm).Any(
61+
v => SqlHelpers.ContainsIgnoreCaseAccents(v, query)) ||
62+
Json.QueryValues(e.LexemeForm).Any(
63+
v => SqlHelpers.ContainsIgnoreCaseAccents((leading ?? "") + v + (trailing ?? ""), query));
64+
}
65+
66+
67+
/// <summary>
68+
/// Computes headwords for all writing systems present in CitationForm or LexemeForm,
69+
/// applying morph tokens when CitationForm is absent.
70+
/// Used for in-memory population of Entry.Headword after loading from DB.
71+
/// </summary>
72+
public static MultiString ComputeHeadwords(Entry entry,
73+
IReadOnlyDictionary<MorphType, MorphTypeData> morphTypeDataLookup)
74+
{
75+
var result = new MultiString();
76+
morphTypeDataLookup.TryGetValue(entry.MorphType, out var morphData);
77+
78+
// Iterate all WS keys that have data, not just "current" vernacular WSs,
79+
// so we don't lose headwords for non-current or future writing systems.
80+
var wsIds = entry.CitationForm.Values.Keys
81+
.Union(entry.LexemeForm.Values.Keys);
82+
83+
foreach (var wsId in wsIds)
84+
{
85+
var citation = entry.CitationForm[wsId];
86+
if (!string.IsNullOrEmpty(citation))
87+
{
88+
result[wsId] = citation.Trim();
89+
continue;
90+
}
91+
92+
var lexeme = entry.LexemeForm[wsId];
93+
if (!string.IsNullOrEmpty(lexeme))
94+
{
95+
var leading = morphData?.LeadingToken ?? "";
96+
var trailing = morphData?.TrailingToken ?? "";
97+
result[wsId] = (leading + lexeme + trailing).Trim();
98+
}
99+
}
100+
101+
return result;
102+
}
20103
}

backend/FwLite/LcmCrdt/Data/Filtering.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ public static IQueryable<Entry> WhereExemplar(
1515
return query.Where(e => e.Headword(ws).StartsWith(exemplar));
1616
}
1717

18+
// Note: morph-token-decorated headword matching is handled at the FTS/JOIN level
19+
// (see EntrySearchService.FilterInternal). This simpler fallback only checks base forms.
1820
public static Expression<Func<Entry, bool>> SearchFilter(string query)
1921
{
2022
return e => e.LexemeForm.SearchValue(query)
@@ -42,11 +44,13 @@ public static Func<Entry, bool> CompiledFilter(string? query, WritingSystemId ws
4244
(null, null) => _ => true,
4345
(not null, null) => e => e.LexemeForm.SearchValue(query)
4446
|| e.CitationForm.SearchValue(query)
47+
|| e.Headword.SearchValue(query)
4548
|| e.Senses.Any(s => s.Gloss.SearchValue(query)),
4649
(null, not null) => e => e.Headword(ws).StartsWith(exemplar),
4750
(_, _) => e => e.Headword(ws).StartsWith(exemplar)
4851
&& (e.LexemeForm.SearchValue(query)
4952
|| e.CitationForm.SearchValue(query)
53+
|| e.Headword.SearchValue(query)
5054
|| e.Senses.Any(s => s.Gloss.SearchValue(query)))
5155
};
5256
}

0 commit comments

Comments
 (0)