diff --git a/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs b/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs index fb6138408b..ce3eb0cc61 100644 --- a/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs +++ b/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs @@ -1,4 +1,7 @@ +using FwDataMiniLcmBridge.Api; +using FwDataMiniLcmBridge.LcmUtils; using FwDataMiniLcmBridge.Tests.Fixtures; +using MiniLcm.Models; namespace FwDataMiniLcmBridge.Tests.MiniLcmTests; @@ -9,4 +12,42 @@ protected override Task NewApi() { return Task.FromResult(fixture.NewProjectApi("sorting-test", "en", "en")); } + + [Theory] + [InlineData("aaaa", SortField.Headword)] // FTS + [InlineData("a", SortField.Headword)] // non-FTS + [InlineData("aaaa", SortField.SearchRelevance)] // FTS + [InlineData("a", SortField.SearchRelevance)] // non-FTS + public async Task SecondaryOrder_DefaultsToStem(string query, SortField sortField) + { + var unknownMorphTypeEntryId = Guid.NewGuid(); + Entry[] expected = [ + new() { Id = unknownMorphTypeEntryId, LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Unknown }, // SecondaryOrder defaults to Stem = 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.BoundStem }, // SecondaryOrder = 2 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 6 + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var fwDataApi = (BaseApi as FwDataMiniLcmApi)!; + await fwDataApi.Cache.DoUsingNewOrCurrentUOW("Clear morph type", + "Revert morph type", + () => + { + // the fwdata api doesn't allow creating entries with MorphType.Other or Unknown, so we force it + var unknownMorphTypeEntry = fwDataApi.EntriesRepository.GetObject(unknownMorphTypeEntryId); + unknownMorphTypeEntry.LexemeFormOA.MorphTypeRA = null; + return ValueTask.CompletedTask; + }); + + var results = (await Api.SearchEntries(query, new(new(sortField))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } } diff --git a/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs b/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs index 82f7c8c7f0..c6f9dec4cb 100644 --- a/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs +++ b/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs @@ -652,7 +652,7 @@ private Entry FromLexEntry(ILexEntry entry) { try { - return new Entry + var result = new Entry { Id = entry.Guid, Note = FromLcmMultiString(entry.Comment), @@ -670,6 +670,7 @@ private Entry FromLexEntry(ILexEntry entry) // ILexEntry.PublishIn is a virtual property that inverts DoNotPublishInRC against all publications PublishIn = entry.PublishIn.Select(FromLcmPossibility).ToList(), }; + return result; } catch (Exception e) { @@ -725,24 +726,22 @@ private ComplexFormComponent ToEntryReference(ILexEntry component, ILexEntry com return new ComplexFormComponent { ComponentEntryId = component.Guid, - ComponentHeadword = component.LexEntryHeadwordOrUnknown(), + ComponentHeadword = component.LexEntryHeadwordOrUnknown(applyMorphTokens: false), // match CRDT for now ComplexFormEntryId = complexEntry.Guid, - ComplexFormHeadword = complexEntry.LexEntryHeadwordOrUnknown(), + ComplexFormHeadword = complexEntry.LexEntryHeadwordOrUnknown(applyMorphTokens: false), // match CRDT for now Order = Order(component, complexEntry) }; } - - private ComplexFormComponent ToSenseReference(ILexSense componentSense, ILexEntry complexEntry) { return new ComplexFormComponent { ComponentEntryId = componentSense.Entry.Guid, ComponentSenseId = componentSense.Guid, - ComponentHeadword = componentSense.Entry.LexEntryHeadwordOrUnknown(), + ComponentHeadword = componentSense.Entry.LexEntryHeadwordOrUnknown(applyMorphTokens: false), // match CRDT for now ComplexFormEntryId = complexEntry.Guid, - ComplexFormHeadword = complexEntry.LexEntryHeadwordOrUnknown(), + ComplexFormHeadword = complexEntry.LexEntryHeadwordOrUnknown(applyMorphTokens: false), // match CRDT for now Order = Order(componentSense, complexEntry) }; } @@ -939,12 +938,13 @@ private IEnumerable GetFilteredAndSortedEntries(Func private IEnumerable ApplySorting(SortOptions order, IEnumerable entries, string? query) { var sortWs = GetWritingSystemHandle(order.WritingSystem, WritingSystemType.Vernacular); + var stemSecondaryOrder = MorphTypeRepository.GetObject(MoMorphTypeTags.kguidMorphStem).SecondaryOrder; if (order.Field == SortField.SearchRelevance) { - return entries.ApplyRoughBestMatchOrder(order, sortWs, query); + return entries.ApplyRoughBestMatchOrder(order, sortWs, stemSecondaryOrder, query); } - return order.ApplyOrder(entries, e => e.LexEntryHeadword(sortWs)); + return entries.ApplyHeadwordOrder(order, sortWs, stemSecondaryOrder); } public IAsyncEnumerable SearchEntries(string query, QueryOptions? options = null) @@ -956,7 +956,7 @@ public IAsyncEnumerable SearchEntries(string query, QueryOptions? options private Func? EntrySearchPredicate(string? query = null) { if (string.IsNullOrEmpty(query)) return null; - return entry => entry.CitationForm.SearchValue(query) || + return entry => entry.SearchHeadWord(query) || // CitationForm.SearchValue would be redundant entry.LexemeFormOA?.Form.SearchValue(query) is true || entry.AllSenses.Any(s => s.Gloss.SearchValue(query)); } diff --git a/backend/FwLite/FwDataMiniLcmBridge/Api/LcmHelpers.cs b/backend/FwLite/FwDataMiniLcmBridge/Api/LcmHelpers.cs index d3b296cf8e..f5793651ef 100644 --- a/backend/FwLite/FwDataMiniLcmBridge/Api/LcmHelpers.cs +++ b/backend/FwLite/FwDataMiniLcmBridge/Api/LcmHelpers.cs @@ -11,7 +11,7 @@ namespace FwDataMiniLcmBridge.Api; internal static class LcmHelpers { - internal static string? LexEntryHeadword(this ILexEntry entry, int? ws = null) + internal static string? LexEntryHeadword(this ILexEntry entry, int? ws = null, bool applyMorphTokens = true) { var citationFormTs = ws.HasValue ? entry.CitationForm.get_String(ws.Value) @@ -27,15 +27,33 @@ internal static class LcmHelpers : null; var lexemeForm = lexemeFormTs?.Text?.Trim(WhitespaceChars); - return lexemeForm; + if (string.IsNullOrEmpty(lexemeForm) || !applyMorphTokens) return lexemeForm; + + var morphType = entry.LexemeFormOA?.MorphTypeRA; + var leading = morphType?.Prefix ?? ""; + var trailing = morphType?.Postfix ?? ""; + return (leading + lexemeForm + trailing).Trim(WhitespaceChars); } - internal static string LexEntryHeadwordOrUnknown(this ILexEntry entry, int? ws = null) + internal static string LexEntryHeadwordOrUnknown(this ILexEntry entry, int? ws = null, bool applyMorphTokens = true) { - var headword = entry.LexEntryHeadword(ws); + var headword = entry.LexEntryHeadword(ws, applyMorphTokens); return string.IsNullOrEmpty(headword) ? Entry.UnknownHeadword : headword; } + internal static bool SearchHeadWord(this ILexEntry entry, string value) + { + foreach (var ws in entry.Cache.ServiceLocator.WritingSystems.VernacularWritingSystems) + { + var headword = entry.HeadWordForWs(ws.Handle); + if (headword is null) continue; + var text = headword.Text; + if (string.IsNullOrEmpty(text)) continue; + if (text.ContainsDiacriticMatch(value)) return true; + } + return false; + } + internal static bool SearchValue(this ITsMultiString multiString, string value) { for (var i = 0; i < multiString.StringCount; i++) diff --git a/backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs b/backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs index 82b87386f8..d021181e24 100644 --- a/backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs +++ b/backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs @@ -6,31 +6,55 @@ namespace FwDataMiniLcmBridge.Api; internal static class Sorting { + public static IEnumerable ApplyHeadwordOrder(this IEnumerable entries, SortOptions order, int sortWsHandle, int stemSecondaryOrder) + { + if (order.Ascending) + { + return entries + .OrderBy(e => e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false)) + .ThenBy(e => e.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder) + // .ThenBy(e => e.HomographNumber) + .ThenBy(e => e.Id.Guid); + } + else + { + return entries + .OrderByDescending(e => e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false)) + .ThenByDescending(e => e.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder) + // .ThenByDescending(e => e.HomographNumber) + .ThenByDescending(e => e.Id.Guid); + } + } + /// /// Rough emulation of FTS search relevance. Headword matches come first, preferring /// prefix matches (e.g. when searching "tan" then "tanan" is before "matan"), then shorter, then alphabetical. /// See also: EntrySearchService.FilterAndRank for the FTS-based equivalent in LcmCrdt. /// - public static IEnumerable ApplyRoughBestMatchOrder(this IEnumerable entries, SortOptions order, int sortWsHandle, string? query = null) + public static IEnumerable ApplyRoughBestMatchOrder(this IEnumerable entries, SortOptions order, int sortWsHandle, int stemSecondaryOrder, string? query = null) { - var projected = entries.Select(e => (Entry: e, Headword: e.LexEntryHeadword(sortWsHandle))); + var projected = entries.Select(e => (Entry: e, Headword: e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false))); if (order.Ascending) { return projected - .OrderByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false)) - .ThenByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false)) + .OrderByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false)) + .ThenByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false)) .ThenBy(x => x.Headword?.Length ?? 0) .ThenBy(x => x.Headword) + .ThenBy(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder) + // .ThenBy(x => x.Entry.HomographNumber) .ThenBy(x => x.Entry.Id.Guid) .Select(x => x.Entry); } else { return projected - .OrderBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false)) - .ThenBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false)) + .OrderBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false)) + .ThenBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false)) .ThenByDescending(x => x.Headword?.Length ?? 0) .ThenByDescending(x => x.Headword) + .ThenByDescending(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder) + // .ThenByDescending(x => x.Entry.HomographNumber) .ThenByDescending(x => x.Entry.Id.Guid) .Select(x => x.Entry); } diff --git a/backend/FwLite/FwDataMiniLcmBridge/Api/UpdateProxy/UpdateMorphTypeProxy.cs b/backend/FwLite/FwDataMiniLcmBridge/Api/UpdateProxy/UpdateMorphTypeProxy.cs index 833eb1ffd9..052a07567c 100644 --- a/backend/FwLite/FwDataMiniLcmBridge/Api/UpdateProxy/UpdateMorphTypeProxy.cs +++ b/backend/FwLite/FwDataMiniLcmBridge/Api/UpdateProxy/UpdateMorphTypeProxy.cs @@ -1,3 +1,4 @@ +using System.Diagnostics.CodeAnalysis; using MiniLcm.Models; using SIL.LCModel; @@ -8,11 +9,13 @@ public class UpdateMorphTypeProxy : MorphType private readonly IMoMorphType _lcmMorphType; private readonly FwDataMiniLcmApi _lexboxLcmApi; + [SetsRequiredMembers] public UpdateMorphTypeProxy(IMoMorphType lcmMorphType, FwDataMiniLcmApi lexboxLcmApi) { _lcmMorphType = lcmMorphType; Id = lcmMorphType.Guid; _lexboxLcmApi = lexboxLcmApi; + Kind = LcmHelpers.FromLcmMorphType(lcmMorphType); } public override MultiString Name diff --git a/backend/FwLite/FwLiteProjectSync.Tests/sena-3-live_snapshot.verified.txt b/backend/FwLite/FwLiteProjectSync.Tests/sena-3-live_snapshot.verified.txt index 7f39efc1d2..af1b0b8168 100644 --- a/backend/FwLite/FwLiteProjectSync.Tests/sena-3-live_snapshot.verified.txt +++ b/backend/FwLite/FwLiteProjectSync.Tests/sena-3-live_snapshot.verified.txt @@ -1,5 +1,111 @@ { "Entries": [ + { + "Id": "cd045907-e8fc-46a3-8f8d-f71bd956275f", + "DeletedAt": null, + "LexemeForm": { + "seh": "a" + }, + "CitationForm": {}, + "LiteralMeaning": {}, + "MorphType": "Root", + "Senses": [ + { + "Id": "c33c51d4-f405-4d34-99c3-5eb36881a0d1", + "Order": 1, + "DeletedAt": null, + "EntryId": "cd045907-e8fc-46a3-8f8d-f71bd956275f", + "Definition": { + "en": { + "Spans": [ + { + "Text": "of", + "Ws": "en" + } + ] + }, + "pt": { + "Spans": [ + { + "Text": "de", + "Ws": "pt" + } + ] + } + }, + "Gloss": { + "en": "ASSOC", + "pt": "ASSOC" + }, + "PartOfSpeech": { + "Id": "8d0461bd-2b2e-4d65-9f17-0ab5b99d0736", + "Name": { + "en": "Associative", + "pt": "Associativo" + }, + "DeletedAt": null, + "Predefined": false + }, + "PartOfSpeechId": "8d0461bd-2b2e-4d65-9f17-0ab5b99d0736", + "SemanticDomains": [], + "ExampleSentences": [ + { + "Id": "6f7fe99a-de48-4761-b58f-688bfec15073", + "Order": 1, + "Sentence": { + "seh": { + "Spans": [ + { + "Text": "mwana wa Fa\u0301tima", + "Ws": "seh" + } + ] + } + }, + "Translations": [ + { + "Id": "2e9fb296-e4a6-4959-ad95-d18d19474a3d", + "Text": { + "en": { + "Spans": [ + { + "Text": "child of Fatima", + "Ws": "en" + } + ] + }, + "pt": { + "Spans": [ + { + "Text": "crianc\u0327a de Fa\u0301tima", + "Ws": "pt" + } + ] + } + } + } + ], + "Reference": null, + "SenseId": "c33c51d4-f405-4d34-99c3-5eb36881a0d1", + "DeletedAt": null + } + ] + } + ], + "Note": {}, + "Components": [], + "ComplexForms": [], + "ComplexFormTypes": [], + "PublishIn": [ + { + "Id": "70c0a758-5901-4884-b992-94ca31087607", + "DeletedAt": null, + "Name": { + "en": "Main Dictionary" + } + } + ] + }, { "Id": "34779c06-5a73-4fe9-8325-b110b23f9293", "DeletedAt": null, @@ -363,35 +469,35 @@ ] }, { - "Id": "af56599e-3eb6-4b61-984d-f69267a0b4a2", + "Id": "d7029d27-45bc-4e53-968e-3c754805eb0a", "DeletedAt": null, "LexemeForm": { "seh": "a" }, "CitationForm": {}, "LiteralMeaning": {}, - "MorphType": "Suffix", + "MorphType": "Prefix", "Senses": [ { - "Id": "6b9b59b9-629f-4c8d-90c8-878a40a35ee8", + "Id": "82d967cc-94e1-47ee-ab71-303394ab0470", "Order": 1, "DeletedAt": null, - "EntryId": "af56599e-3eb6-4b61-984d-f69267a0b4a2", + "EntryId": "d7029d27-45bc-4e53-968e-3c754805eb0a", "Definition": {}, "Gloss": { - "en": "-er", - "pt": "nominalizador" + "en": "3S\u002B1", + "pt": "3S\u002B1" }, "PartOfSpeech": { - "Id": "c99beb3a-995d-4156-a66c-9b7d0860c332", + "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", "Name": { - "en": "Nominalizer", - "pt": "Nominalizador" + "en": "Verb", + "pt": "Verbo" }, "DeletedAt": null, - "Predefined": false + "Predefined": true }, - "PartOfSpeechId": "c99beb3a-995d-4156-a66c-9b7d0860c332", + "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", "SemanticDomains": [], "ExampleSentences": [] } @@ -411,25 +517,73 @@ ] }, { - "Id": "cd045907-e8fc-46a3-8f8d-f71bd956275f", + "Id": "e7727ed9-55da-4c34-bdec-f34b7a07019e", "DeletedAt": null, "LexemeForm": { "seh": "a" }, "CitationForm": {}, "LiteralMeaning": {}, - "MorphType": "Root", + "MorphType": "Prefix", "Senses": [ { - "Id": "c33c51d4-f405-4d34-99c3-5eb36881a0d1", + "Id": "1af27ad8-7b07-462b-90fe-115b5bd63ecd", "Order": 1, "DeletedAt": null, - "EntryId": "cd045907-e8fc-46a3-8f8d-f71bd956275f", + "EntryId": "e7727ed9-55da-4c34-bdec-f34b7a07019e", + "Definition": {}, + "Gloss": { + "en": "PAST", + "pt": "PASSADO" + }, + "PartOfSpeech": { + "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "Name": { + "en": "Verb", + "pt": "Verbo" + }, + "DeletedAt": null, + "Predefined": true + }, + "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "SemanticDomains": [], + "ExampleSentences": [] + } + ], + "Note": {}, + "Components": [], + "ComplexForms": [], + "ComplexFormTypes": [], + "PublishIn": [ + { + "Id": "70c0a758-5901-4884-b992-94ca31087607", + "DeletedAt": null, + "Name": { + "en": "Main Dictionary" + } + } + ] + }, + { + "Id": "f91cf0e0-b3c2-4478-9f1f-becaecf307e5", + "DeletedAt": null, + "LexemeForm": { + "seh": "a" + }, + "CitationForm": {}, + "LiteralMeaning": {}, + "MorphType": "Prefix", + "Senses": [ + { + "Id": "9e9ad5c2-26f8-4ed1-9803-2af452088701", + "Order": 1, + "DeletedAt": null, + "EntryId": "f91cf0e0-b3c2-4478-9f1f-becaecf307e5", "Definition": { "en": { "Spans": [ { - "Text": "of", + "Text": "associative prefix", "Ws": "en" } ] @@ -437,66 +591,40 @@ "pt": { "Spans": [ { - "Text": "de", + "Text": "prefixo associativo", "Ws": "pt" } ] } }, "Gloss": { - "en": "ASSOC", - "pt": "ASSOC" + "en": "assocpx", + "pt": "assocpx" }, "PartOfSpeech": { - "Id": "8d0461bd-2b2e-4d65-9f17-0ab5b99d0736", + "Id": "61b871bd-293d-4144-9c36-4ffe3d3d078f", "Name": { - "en": "Associative", - "pt": "Associativo" + "en": "Possessive", + "pt": "Possessivo" }, "DeletedAt": null, "Predefined": false }, - "PartOfSpeechId": "8d0461bd-2b2e-4d65-9f17-0ab5b99d0736", + "PartOfSpeechId": "61b871bd-293d-4144-9c36-4ffe3d3d078f", "SemanticDomains": [], "ExampleSentences": [ { - "Id": "6f7fe99a-de48-4761-b58f-688bfec15073", + "Id": "fc02be9f-32fb-470b-b305-1395b664a1fb", "Order": 1, - "Sentence": { - "seh": { - "Spans": [ - { - "Text": "mwana wa Fa\u0301tima", - "Ws": "seh" - } - ] - } - }, + "Sentence": {}, "Translations": [ { - "Id": "2e9fb296-e4a6-4959-ad95-d18d19474a3d", - "Text": { - "en": { - "Spans": [ - { - "Text": "child of Fatima", - "Ws": "en" - } - ] - }, - "pt": { - "Spans": [ - { - "Text": "crianc\u0327a de Fa\u0301tima", - "Ws": "pt" - } - ] - } - } + "Id": "771f67f2-e50a-4bc0-8b9a-94bb0a4a4938", + "Text": {} } ], "Reference": null, - "SenseId": "c33c51d4-f405-4d34-99c3-5eb36881a0d1", + "SenseId": "9e9ad5c2-26f8-4ed1-9803-2af452088701", "DeletedAt": null } ] @@ -517,35 +645,35 @@ ] }, { - "Id": "d7029d27-45bc-4e53-968e-3c754805eb0a", + "Id": "af56599e-3eb6-4b61-984d-f69267a0b4a2", "DeletedAt": null, "LexemeForm": { "seh": "a" }, "CitationForm": {}, "LiteralMeaning": {}, - "MorphType": "Prefix", + "MorphType": "Suffix", "Senses": [ { - "Id": "82d967cc-94e1-47ee-ab71-303394ab0470", + "Id": "6b9b59b9-629f-4c8d-90c8-878a40a35ee8", "Order": 1, "DeletedAt": null, - "EntryId": "d7029d27-45bc-4e53-968e-3c754805eb0a", + "EntryId": "af56599e-3eb6-4b61-984d-f69267a0b4a2", "Definition": {}, "Gloss": { - "en": "3S\u002B1", - "pt": "3S\u002B1" + "en": "-er", + "pt": "nominalizador" }, "PartOfSpeech": { - "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "Id": "c99beb3a-995d-4156-a66c-9b7d0860c332", "Name": { - "en": "Verb", - "pt": "Verbo" + "en": "Nominalizer", + "pt": "Nominalizador" }, "DeletedAt": null, - "Predefined": true + "Predefined": false }, - "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "PartOfSpeechId": "c99beb3a-995d-4156-a66c-9b7d0860c332", "SemanticDomains": [], "ExampleSentences": [] } @@ -612,134 +740,6 @@ } ] }, - { - "Id": "e7727ed9-55da-4c34-bdec-f34b7a07019e", - "DeletedAt": null, - "LexemeForm": { - "seh": "a" - }, - "CitationForm": {}, - "LiteralMeaning": {}, - "MorphType": "Prefix", - "Senses": [ - { - "Id": "1af27ad8-7b07-462b-90fe-115b5bd63ecd", - "Order": 1, - "DeletedAt": null, - "EntryId": "e7727ed9-55da-4c34-bdec-f34b7a07019e", - "Definition": {}, - "Gloss": { - "en": "PAST", - "pt": "PASSADO" - }, - "PartOfSpeech": { - "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "Name": { - "en": "Verb", - "pt": "Verbo" - }, - "DeletedAt": null, - "Predefined": true - }, - "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "SemanticDomains": [], - "ExampleSentences": [] - } - ], - "Note": {}, - "Components": [], - "ComplexForms": [], - "ComplexFormTypes": [], - "PublishIn": [ - { - "Id": "70c0a758-5901-4884-b992-94ca31087607", - "DeletedAt": null, - "Name": { - "en": "Main Dictionary" - } - } - ] - }, - { - "Id": "f91cf0e0-b3c2-4478-9f1f-becaecf307e5", - "DeletedAt": null, - "LexemeForm": { - "seh": "a" - }, - "CitationForm": {}, - "LiteralMeaning": {}, - "MorphType": "Prefix", - "Senses": [ - { - "Id": "9e9ad5c2-26f8-4ed1-9803-2af452088701", - "Order": 1, - "DeletedAt": null, - "EntryId": "f91cf0e0-b3c2-4478-9f1f-becaecf307e5", - "Definition": { - "en": { - "Spans": [ - { - "Text": "associative prefix", - "Ws": "en" - } - ] - }, - "pt": { - "Spans": [ - { - "Text": "prefixo associativo", - "Ws": "pt" - } - ] - } - }, - "Gloss": { - "en": "assocpx", - "pt": "assocpx" - }, - "PartOfSpeech": { - "Id": "61b871bd-293d-4144-9c36-4ffe3d3d078f", - "Name": { - "en": "Possessive", - "pt": "Possessivo" - }, - "DeletedAt": null, - "Predefined": false - }, - "PartOfSpeechId": "61b871bd-293d-4144-9c36-4ffe3d3d078f", - "SemanticDomains": [], - "ExampleSentences": [ - { - "Id": "fc02be9f-32fb-470b-b305-1395b664a1fb", - "Order": 1, - "Sentence": {}, - "Translations": [ - { - "Id": "771f67f2-e50a-4bc0-8b9a-94bb0a4a4938", - "Text": {} - } - ], - "Reference": null, - "SenseId": "9e9ad5c2-26f8-4ed1-9803-2af452088701", - "DeletedAt": null - } - ] - } - ], - "Note": {}, - "Components": [], - "ComplexForms": [], - "ComplexFormTypes": [], - "PublishIn": [ - { - "Id": "70c0a758-5901-4884-b992-94ca31087607", - "DeletedAt": null, - "Name": { - "en": "Main Dictionary" - } - } - ] - }, { "Id": "a15b8ae6-207c-4999-b1e0-f95a348d1bdc", "DeletedAt": null, @@ -19390,151 +19390,6 @@ } ] }, - { - "Id": "241ca9b4-9077-4050-b8c9-6e1759a989d3", - "DeletedAt": null, - "LexemeForm": { - "seh": "dza" - }, - "CitationForm": {}, - "LiteralMeaning": {}, - "MorphType": "Prefix", - "Senses": [ - { - "Id": "0156d415-bb52-4d42-8457-ddd27562d830", - "Order": 1, - "DeletedAt": null, - "EntryId": "241ca9b4-9077-4050-b8c9-6e1759a989d3", - "Definition": { - "en": { - "Spans": [ - { - "Text": "culmination", - "Ws": "en" - } - ] - } - }, - "Gloss": { - "en": "CUL", - "pt": "CUL" - }, - "PartOfSpeech": { - "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "Name": { - "en": "Verb", - "pt": "Verbo" - }, - "DeletedAt": null, - "Predefined": true - }, - "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "SemanticDomains": [], - "ExampleSentences": [] - } - ], - "Note": {}, - "Components": [], - "ComplexForms": [], - "ComplexFormTypes": [], - "PublishIn": [ - { - "Id": "70c0a758-5901-4884-b992-94ca31087607", - "DeletedAt": null, - "Name": { - "en": "Main Dictionary" - } - } - ] - }, - { - "Id": "abf2aea2-2eb0-4e73-82df-d2102c1e974b", - "DeletedAt": null, - "LexemeForm": { - "seh": "dza" - }, - "CitationForm": {}, - "LiteralMeaning": {}, - "MorphType": "Prefix", - "Senses": [ - { - "Id": "7aeea56a-5e2a-49b3-a7be-6841608f8364", - "Order": 1, - "DeletedAt": null, - "EntryId": "abf2aea2-2eb0-4e73-82df-d2102c1e974b", - "Definition": { - "en": { - "Spans": [ - { - "Text": "in order to, distal/purpose infinitive", - "Ws": "en" - } - ] - }, - "pt": { - "Spans": [ - { - "Text": "para", - "Ws": "pt" - } - ] - } - }, - "Gloss": { - "en": "PURP", - "pt": "PURP" - }, - "PartOfSpeech": { - "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "Name": { - "en": "Verb", - "pt": "Verbo" - }, - "DeletedAt": null, - "Predefined": true - }, - "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "SemanticDomains": [], - "ExampleSentences": [] - }, - { - "Id": "c6af0fa0-431c-4b33-a1d9-39b590222ee2", - "Order": 2, - "DeletedAt": null, - "EntryId": "abf2aea2-2eb0-4e73-82df-d2102c1e974b", - "Definition": {}, - "Gloss": { - "en": "PURP", - "pt": "PURP" - }, - "PartOfSpeech": { - "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "Name": { - "en": "Verb", - "pt": "Verbo" - }, - "DeletedAt": null, - "Predefined": true - }, - "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "SemanticDomains": [], - "ExampleSentences": [] - } - ], - "Note": {}, - "Components": [], - "ComplexForms": [], - "ComplexFormTypes": [], - "PublishIn": [ - { - "Id": "70c0a758-5901-4884-b992-94ca31087607", - "DeletedAt": null, - "Name": { - "en": "Main Dictionary" - } - } - ] - }, { "Id": "af8c4882-d0f9-44ae-9bc8-6f0073e2b859", "DeletedAt": null, @@ -19660,6 +19515,151 @@ } ] }, + { + "Id": "241ca9b4-9077-4050-b8c9-6e1759a989d3", + "DeletedAt": null, + "LexemeForm": { + "seh": "dza" + }, + "CitationForm": {}, + "LiteralMeaning": {}, + "MorphType": "Prefix", + "Senses": [ + { + "Id": "0156d415-bb52-4d42-8457-ddd27562d830", + "Order": 1, + "DeletedAt": null, + "EntryId": "241ca9b4-9077-4050-b8c9-6e1759a989d3", + "Definition": { + "en": { + "Spans": [ + { + "Text": "culmination", + "Ws": "en" + } + ] + } + }, + "Gloss": { + "en": "CUL", + "pt": "CUL" + }, + "PartOfSpeech": { + "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "Name": { + "en": "Verb", + "pt": "Verbo" + }, + "DeletedAt": null, + "Predefined": true + }, + "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "SemanticDomains": [], + "ExampleSentences": [] + } + ], + "Note": {}, + "Components": [], + "ComplexForms": [], + "ComplexFormTypes": [], + "PublishIn": [ + { + "Id": "70c0a758-5901-4884-b992-94ca31087607", + "DeletedAt": null, + "Name": { + "en": "Main Dictionary" + } + } + ] + }, + { + "Id": "abf2aea2-2eb0-4e73-82df-d2102c1e974b", + "DeletedAt": null, + "LexemeForm": { + "seh": "dza" + }, + "CitationForm": {}, + "LiteralMeaning": {}, + "MorphType": "Prefix", + "Senses": [ + { + "Id": "7aeea56a-5e2a-49b3-a7be-6841608f8364", + "Order": 1, + "DeletedAt": null, + "EntryId": "abf2aea2-2eb0-4e73-82df-d2102c1e974b", + "Definition": { + "en": { + "Spans": [ + { + "Text": "in order to, distal/purpose infinitive", + "Ws": "en" + } + ] + }, + "pt": { + "Spans": [ + { + "Text": "para", + "Ws": "pt" + } + ] + } + }, + "Gloss": { + "en": "PURP", + "pt": "PURP" + }, + "PartOfSpeech": { + "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "Name": { + "en": "Verb", + "pt": "Verbo" + }, + "DeletedAt": null, + "Predefined": true + }, + "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "SemanticDomains": [], + "ExampleSentences": [] + }, + { + "Id": "c6af0fa0-431c-4b33-a1d9-39b590222ee2", + "Order": 2, + "DeletedAt": null, + "EntryId": "abf2aea2-2eb0-4e73-82df-d2102c1e974b", + "Definition": {}, + "Gloss": { + "en": "PURP", + "pt": "PURP" + }, + "PartOfSpeech": { + "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "Name": { + "en": "Verb", + "pt": "Verbo" + }, + "DeletedAt": null, + "Predefined": true + }, + "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "SemanticDomains": [], + "ExampleSentences": [] + } + ], + "Note": {}, + "Components": [], + "ComplexForms": [], + "ComplexFormTypes": [], + "PublishIn": [ + { + "Id": "70c0a758-5901-4884-b992-94ca31087607", + "DeletedAt": null, + "Name": { + "en": "Main Dictionary" + } + } + ] + }, { "Id": "2cb6087a-4ffb-4544-9381-b49733d3bd64", "DeletedAt": null, @@ -27603,6 +27603,123 @@ } ] }, + { + "Id": "41eaf8e3-fad8-4957-b901-21870f508091", + "DeletedAt": null, + "LexemeForm": { + "seh": "i" + }, + "CitationForm": {}, + "LiteralMeaning": {}, + "MorphType": "Prefix", + "Senses": [ + { + "Id": "9800597f-0dab-48f8-93cc-b1a2be904e29", + "Order": 1, + "DeletedAt": null, + "EntryId": "41eaf8e3-fad8-4957-b901-21870f508091", + "Definition": {}, + "Gloss": { + "en": "4\u002B5\u002B9", + "pt": "4\u002B5\u002B9" + }, + "PartOfSpeech": { + "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "Name": { + "en": "Verb", + "pt": "Verbo" + }, + "DeletedAt": null, + "Predefined": true + }, + "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "SemanticDomains": [], + "ExampleSentences": [] + }, + { + "Id": "a9da180c-b617-4897-bbc3-9483e3ec9fae", + "Order": 2, + "DeletedAt": null, + "EntryId": "41eaf8e3-fad8-4957-b901-21870f508091", + "Definition": {}, + "Gloss": { + "en": "4\u002B5\u002B9", + "pt": "4\u002B5\u002B9" + }, + "PartOfSpeech": { + "Id": "8d0461bd-2b2e-4d65-9f17-0ab5b99d0736", + "Name": { + "en": "Associative", + "pt": "Associativo" + }, + "DeletedAt": null, + "Predefined": false + }, + "PartOfSpeechId": "8d0461bd-2b2e-4d65-9f17-0ab5b99d0736", + "SemanticDomains": [], + "ExampleSentences": [] + }, + { + "Id": "b7dd12e2-4eaa-40a1-a07e-8239b886f77f", + "Order": 3, + "DeletedAt": null, + "EntryId": "41eaf8e3-fad8-4957-b901-21870f508091", + "Definition": {}, + "Gloss": { + "en": "4\u002B5\u002B9", + "pt": "4\u002B5\u002B9" + }, + "PartOfSpeech": { + "Id": "61b871bd-293d-4144-9c36-4ffe3d3d078f", + "Name": { + "en": "Possessive", + "pt": "Possessivo" + }, + "DeletedAt": null, + "Predefined": false + }, + "PartOfSpeechId": "61b871bd-293d-4144-9c36-4ffe3d3d078f", + "SemanticDomains": [], + "ExampleSentences": [] + }, + { + "Id": "ad95bb10-5844-4233-9481-227dc410fc16", + "Order": 4, + "DeletedAt": null, + "EntryId": "41eaf8e3-fad8-4957-b901-21870f508091", + "Definition": {}, + "Gloss": { + "en": "4\u002B5\u002B9", + "pt": "4\u002B5\u002B9" + }, + "PartOfSpeech": { + "Id": "b460265b-9132-4e52-bb51-64b5a2aa7f69", + "Name": { + "en": "Adjective", + "pt": "Adjectivo" + }, + "DeletedAt": null, + "Predefined": false + }, + "PartOfSpeechId": "b460265b-9132-4e52-bb51-64b5a2aa7f69", + "SemanticDomains": [], + "ExampleSentences": [] + } + ], + "Note": {}, + "Components": [], + "ComplexForms": [], + "ComplexFormTypes": [], + "PublishIn": [ + { + "Id": "70c0a758-5901-4884-b992-94ca31087607", + "DeletedAt": null, + "Name": { + "en": "Main Dictionary" + } + } + ] + }, { "Id": "15dddc00-6015-4412-aa3b-644db854c89e", "DeletedAt": null, @@ -27701,123 +27818,6 @@ } ] }, - { - "Id": "41eaf8e3-fad8-4957-b901-21870f508091", - "DeletedAt": null, - "LexemeForm": { - "seh": "i" - }, - "CitationForm": {}, - "LiteralMeaning": {}, - "MorphType": "Prefix", - "Senses": [ - { - "Id": "9800597f-0dab-48f8-93cc-b1a2be904e29", - "Order": 1, - "DeletedAt": null, - "EntryId": "41eaf8e3-fad8-4957-b901-21870f508091", - "Definition": {}, - "Gloss": { - "en": "4\u002B5\u002B9", - "pt": "4\u002B5\u002B9" - }, - "PartOfSpeech": { - "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "Name": { - "en": "Verb", - "pt": "Verbo" - }, - "DeletedAt": null, - "Predefined": true - }, - "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "SemanticDomains": [], - "ExampleSentences": [] - }, - { - "Id": "a9da180c-b617-4897-bbc3-9483e3ec9fae", - "Order": 2, - "DeletedAt": null, - "EntryId": "41eaf8e3-fad8-4957-b901-21870f508091", - "Definition": {}, - "Gloss": { - "en": "4\u002B5\u002B9", - "pt": "4\u002B5\u002B9" - }, - "PartOfSpeech": { - "Id": "8d0461bd-2b2e-4d65-9f17-0ab5b99d0736", - "Name": { - "en": "Associative", - "pt": "Associativo" - }, - "DeletedAt": null, - "Predefined": false - }, - "PartOfSpeechId": "8d0461bd-2b2e-4d65-9f17-0ab5b99d0736", - "SemanticDomains": [], - "ExampleSentences": [] - }, - { - "Id": "b7dd12e2-4eaa-40a1-a07e-8239b886f77f", - "Order": 3, - "DeletedAt": null, - "EntryId": "41eaf8e3-fad8-4957-b901-21870f508091", - "Definition": {}, - "Gloss": { - "en": "4\u002B5\u002B9", - "pt": "4\u002B5\u002B9" - }, - "PartOfSpeech": { - "Id": "61b871bd-293d-4144-9c36-4ffe3d3d078f", - "Name": { - "en": "Possessive", - "pt": "Possessivo" - }, - "DeletedAt": null, - "Predefined": false - }, - "PartOfSpeechId": "61b871bd-293d-4144-9c36-4ffe3d3d078f", - "SemanticDomains": [], - "ExampleSentences": [] - }, - { - "Id": "ad95bb10-5844-4233-9481-227dc410fc16", - "Order": 4, - "DeletedAt": null, - "EntryId": "41eaf8e3-fad8-4957-b901-21870f508091", - "Definition": {}, - "Gloss": { - "en": "4\u002B5\u002B9", - "pt": "4\u002B5\u002B9" - }, - "PartOfSpeech": { - "Id": "b460265b-9132-4e52-bb51-64b5a2aa7f69", - "Name": { - "en": "Adjective", - "pt": "Adjectivo" - }, - "DeletedAt": null, - "Predefined": false - }, - "PartOfSpeechId": "b460265b-9132-4e52-bb51-64b5a2aa7f69", - "SemanticDomains": [], - "ExampleSentences": [] - } - ], - "Note": {}, - "Components": [], - "ComplexForms": [], - "ComplexFormTypes": [], - "PublishIn": [ - { - "Id": "70c0a758-5901-4884-b992-94ca31087607", - "DeletedAt": null, - "Name": { - "en": "Main Dictionary" - } - } - ] - }, { "Id": "86edf77a-16db-4ac5-9c24-a34bd334c712", "DeletedAt": null, @@ -35674,67 +35674,6 @@ } ] }, - { - "Id": "942fb913-e648-473c-a6ee-a592dd8996db", - "DeletedAt": null, - "LexemeForm": { - "seh": "-ko" - }, - "CitationForm": { - "seh": "ko" - }, - "LiteralMeaning": {}, - "MorphType": "Suffix", - "Senses": [ - { - "Id": "8d3e8de3-e531-4c01-98ac-6143f6bf3559", - "Order": 1, - "DeletedAt": null, - "EntryId": "942fb913-e648-473c-a6ee-a592dd8996db", - "Definition": {}, - "Gloss": { - "en": "LOC there", - "pt": "ali" - }, - "PartOfSpeech": null, - "PartOfSpeechId": null, - "SemanticDomains": [], - "ExampleSentences": [ - { - "Id": "403597a6-4605-423d-a9da-2c079427acfd", - "Order": 1, - "Sentence": { - "seh": { - "Spans": [ - { - "Text": "kauka-ko eko noko kubazari-ko", - "Ws": "seh" - } - ] - } - }, - "Translations": [], - "Reference": null, - "SenseId": "8d3e8de3-e531-4c01-98ac-6143f6bf3559", - "DeletedAt": null - } - ] - } - ], - "Note": {}, - "Components": [], - "ComplexForms": [], - "ComplexFormTypes": [], - "PublishIn": [ - { - "Id": "70c0a758-5901-4884-b992-94ca31087607", - "DeletedAt": null, - "Name": { - "en": "Main Dictionary" - } - } - ] - }, { "Id": "ef57f28b-7741-4249-9572-63de6aa524d7", "DeletedAt": null, @@ -35800,6 +35739,67 @@ } ] }, + { + "Id": "942fb913-e648-473c-a6ee-a592dd8996db", + "DeletedAt": null, + "LexemeForm": { + "seh": "-ko" + }, + "CitationForm": { + "seh": "ko" + }, + "LiteralMeaning": {}, + "MorphType": "Suffix", + "Senses": [ + { + "Id": "8d3e8de3-e531-4c01-98ac-6143f6bf3559", + "Order": 1, + "DeletedAt": null, + "EntryId": "942fb913-e648-473c-a6ee-a592dd8996db", + "Definition": {}, + "Gloss": { + "en": "LOC there", + "pt": "ali" + }, + "PartOfSpeech": null, + "PartOfSpeechId": null, + "SemanticDomains": [], + "ExampleSentences": [ + { + "Id": "403597a6-4605-423d-a9da-2c079427acfd", + "Order": 1, + "Sentence": { + "seh": { + "Spans": [ + { + "Text": "kauka-ko eko noko kubazari-ko", + "Ws": "seh" + } + ] + } + }, + "Translations": [], + "Reference": null, + "SenseId": "8d3e8de3-e531-4c01-98ac-6143f6bf3559", + "DeletedAt": null + } + ] + } + ], + "Note": {}, + "Components": [], + "ComplexForms": [], + "ComplexFormTypes": [], + "PublishIn": [ + { + "Id": "70c0a758-5901-4884-b992-94ca31087607", + "DeletedAt": null, + "Name": { + "en": "Main Dictionary" + } + } + ] + }, { "Id": "fa12a4b4-201a-43c6-bcd1-8007cabebe1a", "DeletedAt": null, @@ -36857,6 +36857,64 @@ } ] }, + { + "Id": "76be65ab-4221-41d9-a68f-db80ef100c96", + "DeletedAt": null, + "LexemeForm": { + "seh": "ku" + }, + "CitationForm": {}, + "LiteralMeaning": {}, + "MorphType": "Stem", + "Senses": [ + { + "Id": "d59897d5-86fc-4e24-9d9a-f1ef7a104c77", + "Order": 1, + "DeletedAt": null, + "EntryId": "76be65ab-4221-41d9-a68f-db80ef100c96", + "Definition": {}, + "Gloss": { + "en": "to", + "pt": "a\u0301" + }, + "PartOfSpeech": { + "Id": "24f4134f-0530-449c-b809-8a633ced440d", + "Name": { + "en": "Preposition", + "pt": "Preposic\u0327a\u0303o" + }, + "DeletedAt": null, + "Predefined": false + }, + "PartOfSpeechId": "24f4134f-0530-449c-b809-8a633ced440d", + "SemanticDomains": [], + "ExampleSentences": [ + { + "Id": "7336f02a-0896-4169-9174-e2f11fb4f00d", + "Order": 1, + "Sentence": {}, + "Translations": [], + "Reference": null, + "SenseId": "d59897d5-86fc-4e24-9d9a-f1ef7a104c77", + "DeletedAt": null + } + ] + } + ], + "Note": {}, + "Components": [], + "ComplexForms": [], + "ComplexFormTypes": [], + "PublishIn": [ + { + "Id": "70c0a758-5901-4884-b992-94ca31087607", + "DeletedAt": null, + "Name": { + "en": "Main Dictionary" + } + } + ] + }, { "Id": "25104932-acb5-4b6c-b1da-2e3289c4b6ed", "DeletedAt": null, @@ -37246,64 +37304,6 @@ } ] }, - { - "Id": "76be65ab-4221-41d9-a68f-db80ef100c96", - "DeletedAt": null, - "LexemeForm": { - "seh": "ku" - }, - "CitationForm": {}, - "LiteralMeaning": {}, - "MorphType": "Stem", - "Senses": [ - { - "Id": "d59897d5-86fc-4e24-9d9a-f1ef7a104c77", - "Order": 1, - "DeletedAt": null, - "EntryId": "76be65ab-4221-41d9-a68f-db80ef100c96", - "Definition": {}, - "Gloss": { - "en": "to", - "pt": "a\u0301" - }, - "PartOfSpeech": { - "Id": "24f4134f-0530-449c-b809-8a633ced440d", - "Name": { - "en": "Preposition", - "pt": "Preposic\u0327a\u0303o" - }, - "DeletedAt": null, - "Predefined": false - }, - "PartOfSpeechId": "24f4134f-0530-449c-b809-8a633ced440d", - "SemanticDomains": [], - "ExampleSentences": [ - { - "Id": "7336f02a-0896-4169-9174-e2f11fb4f00d", - "Order": 1, - "Sentence": {}, - "Translations": [], - "Reference": null, - "SenseId": "d59897d5-86fc-4e24-9d9a-f1ef7a104c77", - "DeletedAt": null - } - ] - } - ], - "Note": {}, - "Components": [], - "ComplexForms": [], - "ComplexFormTypes": [], - "PublishIn": [ - { - "Id": "70c0a758-5901-4884-b992-94ca31087607", - "DeletedAt": null, - "Name": { - "en": "Main Dictionary" - } - } - ] - }, { "Id": "7af9da8b-bf27-4006-bd4c-0bbc5205633f", "DeletedAt": null, @@ -56273,6 +56273,81 @@ } ] }, + { + "Id": "d2a90aca-1c65-4a90-955a-4ffe3ec34185", + "DeletedAt": null, + "LexemeForm": { + "seh": "mu" + }, + "CitationForm": {}, + "LiteralMeaning": {}, + "MorphType": "Stem", + "Senses": [ + { + "Id": "712de4ef-13ce-4516-a450-4f4e3aa1c027", + "Order": 1, + "DeletedAt": null, + "EntryId": "d2a90aca-1c65-4a90-955a-4ffe3ec34185", + "Definition": { + "en": { + "Spans": [ + { + "Text": "in, into", + "Ws": "en" + } + ] + }, + "pt": { + "Spans": [ + { + "Text": "em", + "Ws": "pt" + } + ] + } + }, + "Gloss": { + "en": "in", + "pt": "em" + }, + "PartOfSpeech": { + "Id": "24f4134f-0530-449c-b809-8a633ced440d", + "Name": { + "en": "Preposition", + "pt": "Preposic\u0327a\u0303o" + }, + "DeletedAt": null, + "Predefined": false + }, + "PartOfSpeechId": "24f4134f-0530-449c-b809-8a633ced440d", + "SemanticDomains": [], + "ExampleSentences": [ + { + "Id": "f4864315-6769-4836-bb01-ee069d68dab4", + "Order": 1, + "Sentence": {}, + "Translations": [], + "Reference": null, + "SenseId": "712de4ef-13ce-4516-a450-4f4e3aa1c027", + "DeletedAt": null + } + ] + } + ], + "Note": {}, + "Components": [], + "ComplexForms": [], + "ComplexFormTypes": [], + "PublishIn": [ + { + "Id": "70c0a758-5901-4884-b992-94ca31087607", + "DeletedAt": null, + "Name": { + "en": "Main Dictionary" + } + } + ] + }, { "Id": "30caab0e-4a54-44ba-8eeb-35a1b69ac231", "DeletedAt": null, @@ -56733,81 +56808,6 @@ } ] }, - { - "Id": "d2a90aca-1c65-4a90-955a-4ffe3ec34185", - "DeletedAt": null, - "LexemeForm": { - "seh": "mu" - }, - "CitationForm": {}, - "LiteralMeaning": {}, - "MorphType": "Stem", - "Senses": [ - { - "Id": "712de4ef-13ce-4516-a450-4f4e3aa1c027", - "Order": 1, - "DeletedAt": null, - "EntryId": "d2a90aca-1c65-4a90-955a-4ffe3ec34185", - "Definition": { - "en": { - "Spans": [ - { - "Text": "in, into", - "Ws": "en" - } - ] - }, - "pt": { - "Spans": [ - { - "Text": "em", - "Ws": "pt" - } - ] - } - }, - "Gloss": { - "en": "in", - "pt": "em" - }, - "PartOfSpeech": { - "Id": "24f4134f-0530-449c-b809-8a633ced440d", - "Name": { - "en": "Preposition", - "pt": "Preposic\u0327a\u0303o" - }, - "DeletedAt": null, - "Predefined": false - }, - "PartOfSpeechId": "24f4134f-0530-449c-b809-8a633ced440d", - "SemanticDomains": [], - "ExampleSentences": [ - { - "Id": "f4864315-6769-4836-bb01-ee069d68dab4", - "Order": 1, - "Sentence": {}, - "Translations": [], - "Reference": null, - "SenseId": "712de4ef-13ce-4516-a450-4f4e3aa1c027", - "DeletedAt": null - } - ] - } - ], - "Note": {}, - "Components": [], - "ComplexForms": [], - "ComplexFormTypes": [], - "PublishIn": [ - { - "Id": "70c0a758-5901-4884-b992-94ca31087607", - "DeletedAt": null, - "Name": { - "en": "Main Dictionary" - } - } - ] - }, { "Id": "b1beac40-b5b6-4cc6-99cc-85c399fdc9d1", "DeletedAt": null, @@ -61712,71 +61712,6 @@ } ] }, - { - "Id": "681dcfd8-824e-49f3-a0ed-ca68f31ee706", - "DeletedAt": null, - "LexemeForm": { - "seh": "na" - }, - "CitationForm": {}, - "LiteralMeaning": {}, - "MorphType": "Prefix", - "Senses": [ - { - "Id": "81181c3d-cf69-40a1-89d4-d3ab0077528a", - "Order": 1, - "DeletedAt": null, - "EntryId": "681dcfd8-824e-49f3-a0ed-ca68f31ee706", - "Definition": { - "en": { - "Spans": [ - { - "Text": "nonpast", - "Ws": "en" - } - ] - }, - "pt": { - "Spans": [ - { - "Text": "na\u0303o passado", - "Ws": "pt" - } - ] - } - }, - "Gloss": { - "en": "NONPST", - "pt": "NONPST" - }, - "PartOfSpeech": { - "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "Name": { - "en": "Verb", - "pt": "Verbo" - }, - "DeletedAt": null, - "Predefined": true - }, - "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", - "SemanticDomains": [], - "ExampleSentences": [] - } - ], - "Note": {}, - "Components": [], - "ComplexForms": [], - "ComplexFormTypes": [], - "PublishIn": [ - { - "Id": "70c0a758-5901-4884-b992-94ca31087607", - "DeletedAt": null, - "Name": { - "en": "Main Dictionary" - } - } - ] - }, { "Id": "9f64c8e1-7682-44eb-967a-5dc788f9d680", "DeletedAt": null, @@ -62205,6 +62140,71 @@ } ] }, + { + "Id": "681dcfd8-824e-49f3-a0ed-ca68f31ee706", + "DeletedAt": null, + "LexemeForm": { + "seh": "na" + }, + "CitationForm": {}, + "LiteralMeaning": {}, + "MorphType": "Prefix", + "Senses": [ + { + "Id": "81181c3d-cf69-40a1-89d4-d3ab0077528a", + "Order": 1, + "DeletedAt": null, + "EntryId": "681dcfd8-824e-49f3-a0ed-ca68f31ee706", + "Definition": { + "en": { + "Spans": [ + { + "Text": "nonpast", + "Ws": "en" + } + ] + }, + "pt": { + "Spans": [ + { + "Text": "na\u0303o passado", + "Ws": "pt" + } + ] + } + }, + "Gloss": { + "en": "NONPST", + "pt": "NONPST" + }, + "PartOfSpeech": { + "Id": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "Name": { + "en": "Verb", + "pt": "Verbo" + }, + "DeletedAt": null, + "Predefined": true + }, + "PartOfSpeechId": "86ff66f6-0774-407a-a0dc-3eeaf873daf7", + "SemanticDomains": [], + "ExampleSentences": [] + } + ], + "Note": {}, + "Components": [], + "ComplexForms": [], + "ComplexFormTypes": [], + "PublishIn": [ + { + "Id": "70c0a758-5901-4884-b992-94ca31087607", + "DeletedAt": null, + "Name": { + "en": "Main Dictionary" + } + } + ] + }, { "Id": "02f5101e-b6e2-47e5-b033-dd7197b5734b", "DeletedAt": null, diff --git a/backend/FwLite/LcmCrdt.Tests/Data/FilteringTests.cs b/backend/FwLite/LcmCrdt.Tests/Data/FilteringTests.cs index 9fcac6de49..3c70a5184e 100644 --- a/backend/FwLite/LcmCrdt.Tests/Data/FilteringTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/Data/FilteringTests.cs @@ -1,11 +1,11 @@ using LcmCrdt.Data; -using MiniLcm.Models; namespace LcmCrdt.Tests.Data; public class FilteringTests { private readonly List _entries; + private readonly IQueryable _morphTypes; public FilteringTests() { @@ -14,6 +14,7 @@ public FilteringTests() new Entry { LexemeForm = { { "en", "123" } }, }, new Entry { LexemeForm = { { "en", "456" } }, } ]; + _morphTypes = new MorphType[] { new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Stem, Name = { ["en"] = "Stem" }, SecondaryOrder = 1 } }.AsQueryable(); } [Theory] @@ -36,7 +37,7 @@ public void WhereExemplar_CompiledFilter_ShouldReturnSameResults(string exemplar [InlineData("9")] public void SearchFilter_CompiledFilter_ShouldReturnSameResults(string query) { - var expected = _entries.AsQueryable().Where(Filtering.SearchFilter(query)).ToList(); + var expected = Filtering.SearchFilter(_entries.AsQueryable(), _morphTypes, query).ToList(); var actual = _entries.Where(Filtering.CompiledFilter(query, "en", null)).ToList(); @@ -52,9 +53,8 @@ public void CombinedFilter_CompiledFilter_ShouldReturnSameResults(string exempla { WritingSystemId ws = "en"; - var expected = _entries.AsQueryable() - .WhereExemplar(ws, exemplar) - .Where(Filtering.SearchFilter(query)) + var expected = Filtering.SearchFilter( + _entries.AsQueryable().WhereExemplar(ws, exemplar), _morphTypes, query) .ToList(); var actual = _entries.Where(Filtering.CompiledFilter(query, ws, exemplar)).ToList(); diff --git a/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.SearchTableIsUpdatedAutomaticallyOnInsert.verified.txt b/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.SearchTableIsUpdatedAutomaticallyOnInsert.verified.txt index c9a939d612..f9a2a3165b 100644 --- a/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.SearchTableIsUpdatedAutomaticallyOnInsert.verified.txt +++ b/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.SearchTableIsUpdatedAutomaticallyOnInsert.verified.txt @@ -1,6 +1,6 @@ { Id: Guid_1, - Headword: citation1, + Headword: citation1 fr_citation1, CitationForm: citation1 fr_citation1, LexemeForm: lexemeform1 fr_lexemeform1, Gloss: gloss1 es_gloss1 es_gloss2, diff --git a/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.SearchTableIsUpdatedAutomaticallyOnUpdate.verified.txt b/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.SearchTableIsUpdatedAutomaticallyOnUpdate.verified.txt index 1a2b5bf49e..89d2271942 100644 --- a/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.SearchTableIsUpdatedAutomaticallyOnUpdate.verified.txt +++ b/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.SearchTableIsUpdatedAutomaticallyOnUpdate.verified.txt @@ -1,7 +1,7 @@ [ { Id: Guid_1, - Headword: citation1, + Headword: citation1 fr_citation1, CitationForm: citation1 fr_citation1, LexemeForm: lexemeform2 fr_lexemeform1, Gloss: , diff --git a/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.cs b/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.cs index 4ad4984328..0f04310514 100644 --- a/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.cs @@ -250,22 +250,23 @@ public async Task RanksResultsByColumn() var definition = Guid.NewGuid(); //only en is used for the headword await _service.UpdateEntrySearchTable(new Entry() { Id = headword, LexemeForm = { { "en", word } } }); - //using fr ensures that this value doesn't show up in the headword - await _service.UpdateEntrySearchTable(new Entry() { Id = citationForm, CitationForm = { { "fr", word } } }); - await _service.UpdateEntrySearchTable(new Entry() { Id = lexemeForm, LexemeForm = { { "fr", word } } }); + //equivalent to headword + await _service.UpdateEntrySearchTable(new Entry() { Id = citationForm, CitationForm = { { "en", word } } }); + //using citation form ensures the matching lexeme-form isn't in the headword + await _service.UpdateEntrySearchTable(new Entry() { Id = lexemeForm, LexemeForm = { { "en", word } }, CitationForm = { { "en", "❌" } } }); await _service.UpdateEntrySearchTable(new Entry() { Id = definition, Senses = { new Sense() { Definition = { { "en", new RichString(word, "en") } } } } }); await _service.UpdateEntrySearchTable(new Entry() { Id = gloss, Senses = { new Sense() { Gloss = { { "en", word } } } } }); var result = await _service.Search(word).ToArrayAsync(); result.Select(e => Named(e.Id)).Should() - .Equal(["headword", "citation", "lexemeform", "gloss", "definition"]); + .Equal(["headword", "headword", "lexemeform", "gloss", "definition"]); string Named(Guid id) { return id switch { _ when id == headword => "headword", - _ when id == citationForm => "citation", + _ when id == citationForm => "headword", // always used as headword _ when id == lexemeForm => "lexemeform", _ when id == gloss => "gloss", _ when id == definition => "definition", diff --git a/backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs b/backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs index 0f5abb8e73..e32b8f355e 100644 --- a/backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs @@ -16,4 +16,39 @@ public override async Task DisposeAsync() await base.DisposeAsync(); await _fixture.DisposeAsync(); } + + [Theory] + [InlineData("aaaa", SortField.Headword)] // FTS + [InlineData("a", SortField.Headword)] // non-FTS + [InlineData("aaaa", SortField.SearchRelevance)] // FTS + [InlineData("a", SortField.SearchRelevance)] // non-FTS + public async Task SecondaryOrder_DefaultsToStem(string query, SortField sortField) + { + MorphType[] morphTypes = [ + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Stem, Name = { ["en"] = "Stem" }, SecondaryOrder = 1 }, + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.BoundStem, Name = { ["en"] = "BoundStem" }, SecondaryOrder = 2 }, + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Suffix, Name = { ["en"] = "Suffix" }, Postfix = "-", SecondaryOrder = 6 }, + ]; + + foreach (var morphType in morphTypes) + await Api.CreateMorphType(morphType); + + Entry[] expected = [ + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Unknown }, // SecondaryOrder defaults to Stem = 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.BoundStem }, // SecondaryOrder = 2 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 6 + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(query, new(new(sortField))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } } diff --git a/backend/FwLite/LcmCrdt/Data/EntryQueryHelpers.cs b/backend/FwLite/LcmCrdt/Data/EntryQueryHelpers.cs index fdbddb4264..4f07794242 100644 --- a/backend/FwLite/LcmCrdt/Data/EntryQueryHelpers.cs +++ b/backend/FwLite/LcmCrdt/Data/EntryQueryHelpers.cs @@ -8,13 +8,88 @@ public static class EntryQueryHelpers [ExpressionMethod(nameof(HeadwordExpression))] public static string Headword(this Entry e, WritingSystemId ws) { - var word = e.CitationForm[ws]; - if (string.IsNullOrEmpty(word)) word = e.LexemeForm[ws]; - return word.Trim(); + var citation = e.CitationForm[ws]?.Trim(); + if (!string.IsNullOrEmpty(citation)) return citation; + return e.LexemeForm[ws]?.Trim() ?? string.Empty; } private static Expression> HeadwordExpression() => - (e, ws) => (string.IsNullOrEmpty(Json.Value(e.CitationForm, ms => ms[ws])) - ? Json.Value(e.LexemeForm, ms => ms[ws]) - : Json.Value(e.CitationForm, ms => ms[ws]))!.Trim(); + (e, ws) => string.IsNullOrEmpty((Json.Value(e.CitationForm, ms => ms[ws]) ?? "").Trim()) + ? (Json.Value(e.LexemeForm, ms => ms[ws]) ?? "").Trim() + : (Json.Value(e.CitationForm, ms => ms[ws]) ?? "").Trim(); + + [ExpressionMethod(nameof(HeadwordWithTokensExpression))] + public static string HeadwordWithTokens(this Entry e, WritingSystemId ws, string? leading, string? trailing) + { + var citation = e.CitationForm[ws]?.Trim(); + if (!string.IsNullOrEmpty(citation)) return citation; + var lexeme = e.LexemeForm[ws]?.Trim(); + if (string.IsNullOrEmpty(lexeme)) return string.Empty; + return ((leading ?? "") + lexeme + (trailing ?? "")).Trim(); + } + + private static Expression> HeadwordWithTokensExpression() => + (e, ws, leading, trailing) => + string.IsNullOrEmpty((Json.Value(e.CitationForm, ms => ms[ws]) ?? "").Trim()) + ? string.IsNullOrEmpty((Json.Value(e.LexemeForm, ms => ms[ws]) ?? "").Trim()) + ? "" + : ((leading ?? "") + (Json.Value(e.LexemeForm, ms => ms[ws]) ?? "").Trim() + (trailing ?? "")).Trim() + : Json.Value(e.CitationForm, ms => ms[ws])!.Trim(); + + [ExpressionMethod(nameof(SearchHeadwords))] + public static bool SearchHeadwords(this Entry e, string? leading, string? trailing, string query) + { + return e.CitationForm.SearchValue(query) + || e.LexemeForm.Values.Any(kvp => + string.IsNullOrEmpty(e.CitationForm[kvp.Key]?.Trim()) && + SqlHelpers.ContainsIgnoreCaseAccents((leading ?? "") + (kvp.Value?.Trim() ?? "") + (trailing ?? ""), query)); + } + + private static Expression> SearchHeadwords() + { + return (e, leading, trailing, query) => + Json.QueryValues(e.CitationForm).Any( + v => SqlHelpers.ContainsIgnoreCaseAccents(v, query)) || + Json.QueryEntries(e.LexemeForm).Any(kv => + string.IsNullOrEmpty((Json.Value(e.CitationForm, ms => ms[kv.Key]) ?? "").Trim()) && + SqlHelpers.ContainsIgnoreCaseAccents((leading ?? "") + kv.Value + (trailing ?? ""), query)); + } + + + /// + /// Computes headwords for all writing systems present in CitationForm or LexemeForm, + /// applying morph tokens when CitationForm is absent. + /// Used for in-memory population of Entry.Headword after loading from DB. + /// + public static MultiString ComputeHeadwords(Entry entry, + IReadOnlyDictionary morphTypeDataLookup) + { + var result = new MultiString(); + morphTypeDataLookup.TryGetValue(entry.MorphType, out var morphData); + + // Iterate all WS keys that have data, not just "current" vernacular WSs, + // so we don't lose headwords for non-current or future writing systems. + var wsIds = entry.CitationForm.Values.Keys + .Union(entry.LexemeForm.Values.Keys); + + foreach (var wsId in wsIds) + { + var citation = entry.CitationForm[wsId]?.Trim(); + if (!string.IsNullOrEmpty(citation)) + { + result[wsId] = citation; + continue; + } + + var lexeme = entry.LexemeForm[wsId]?.Trim(); + if (!string.IsNullOrEmpty(lexeme)) + { + var leading = morphData?.Prefix ?? ""; + var trailing = morphData?.Postfix ?? ""; + result[wsId] = (leading + lexeme + trailing).Trim(); + } + } + + return result; + } } diff --git a/backend/FwLite/LcmCrdt/Data/Filtering.cs b/backend/FwLite/LcmCrdt/Data/Filtering.cs index d2fc9b45cd..638dc29742 100644 --- a/backend/FwLite/LcmCrdt/Data/Filtering.cs +++ b/backend/FwLite/LcmCrdt/Data/Filtering.cs @@ -15,11 +15,15 @@ public static IQueryable WhereExemplar( return query.Where(e => e.Headword(ws).StartsWith(exemplar)); } - public static Expression> SearchFilter(string query) + public static IQueryable SearchFilter(IQueryable entries, IQueryable morphTypes, string query) { - return e => e.LexemeForm.SearchValue(query) - || e.CitationForm.SearchValue(query) - || e.Senses.Any(s => s.Gloss.SearchValue(query)); + return from entry in entries + join mt in morphTypes on entry.MorphType equals mt.Kind into mtGroup + from mt in mtGroup.DefaultIfEmpty() + where entry.SearchHeadwords(mt.Prefix, mt.Postfix, query) // CitationForm.SearchValue would be redundant + || entry.LexemeForm.SearchValue(query) + || entry.Senses.Any(s => s.Gloss.SearchValue(query)) + select entry; } public static Expression> FtsFilter(string query, IQueryable diff --git a/backend/FwLite/LcmCrdt/Data/MiniLcmRepository.cs b/backend/FwLite/LcmCrdt/Data/MiniLcmRepository.cs index 44a92673a4..59cfbca991 100644 --- a/backend/FwLite/LcmCrdt/Data/MiniLcmRepository.cs +++ b/backend/FwLite/LcmCrdt/Data/MiniLcmRepository.cs @@ -200,20 +200,24 @@ private async Task EnsureWritingSystemIsPopulated(QueryOptions que { if (SearchService is not null && SearchService.ValidSearchTerm(query)) { + var morphTypes = await dbContext.MorphTypes.ToArrayAsyncEF(); if (sortOptions is not null && sortOptions.Field == SortField.SearchRelevance) { //ranking must be done at the same time as part of the full-text search, so we can't use normal sorting sortingHandled = true; - queryable = SearchService.FilterAndRank(queryable, query, sortOptions.WritingSystem); + queryable = SearchService.FilterAndRank(queryable, query, sortOptions.WritingSystem, morphTypes); } else { - queryable = SearchService.Filter(queryable, query); + var filterWs = sortOptions?.WritingSystem + ?? (await GetWritingSystem(default, WritingSystemType.Vernacular))?.WsId + ?? default; + queryable = SearchService.Filter(queryable, query, filterWs, morphTypes); } } else { - queryable = queryable.Where(Filtering.SearchFilter(query)); + queryable = Filtering.SearchFilter(queryable, dbContext.GetTable(), query); } } @@ -225,12 +229,10 @@ private ValueTask> ApplySorting(IQueryable queryable, Q if (options.Order.WritingSystem == default) throw new ArgumentException("Sorting writing system must be specified", nameof(options)); - var wsId = options.Order.WritingSystem; - IQueryable result = options.Order.Field switch + var result = options.Order.Field switch { - SortField.SearchRelevance => queryable.ApplyRoughBestMatchOrder(options.Order, query), - SortField.Headword => - options.ApplyOrder(queryable, e => e.Headword(wsId).CollateUnicode(wsId)).ThenBy(e => e.Id), + SortField.SearchRelevance => queryable.ApplyRoughBestMatchOrder(dbContext.GetTable(), options.Order, query), + SortField.Headword => queryable.ApplyHeadwordOrder(dbContext.GetTable(), options.Order), _ => throw new ArgumentOutOfRangeException(nameof(options), "sort field unknown " + options.Order.Field) }; return new ValueTask>(result); diff --git a/backend/FwLite/LcmCrdt/Data/Sorting.cs b/backend/FwLite/LcmCrdt/Data/Sorting.cs index 957fd4b978..acb0690737 100644 --- a/backend/FwLite/LcmCrdt/Data/Sorting.cs +++ b/backend/FwLite/LcmCrdt/Data/Sorting.cs @@ -1,31 +1,77 @@ +using LinqToDB; + namespace LcmCrdt.Data; public static class Sorting { + public static IQueryable ApplyHeadwordOrder(this IQueryable entries, ITable morphTypes, SortOptions order, string? query = null) + { + var stemOrder = morphTypes.Where(m => m.Kind == MorphTypeKind.Stem).Select(m => m.SecondaryOrder); + if (order.Ascending) + { + return + from entry in entries + orderby + entry.Headword(order.WritingSystem).CollateUnicode(order.WritingSystem), + morphTypes.Where(m => m.Kind == entry.MorphType) + .Select(m => (int?)m.SecondaryOrder).FirstOrDefault() ?? stemOrder.FirstOrDefault(), + // entry.HomographNumber, + entry.Id + select entry; + } + else + { + return + from entry in entries + orderby + entry.Headword(order.WritingSystem).CollateUnicode(order.WritingSystem) descending, + (morphTypes.Where(m => m.Kind == entry.MorphType) + .Select(m => (int?)m.SecondaryOrder).FirstOrDefault() ?? stemOrder.FirstOrDefault()) descending, + // entry.HomographNumber descending, + entry.Id descending + select entry; + } + } + /// /// Rough search relevance for when FTS is unavailable. Headword matches come first, preferring /// prefix matches (e.g. when searching "tan" then "tanan" is before "matan"), then shorter, then alphabetical. /// See also: for the FTS-based equivalent. /// - public static IQueryable ApplyRoughBestMatchOrder(this IQueryable entries, SortOptions order, string? query = null) + public static IQueryable ApplyRoughBestMatchOrder(this IQueryable entries, ITable morphTypes, SortOptions order, string? query = null) { + var stemOrder = morphTypes.Where(m => m.Kind == MorphTypeKind.Stem).Select(m => m.SecondaryOrder); if (order.Ascending) { - return entries - .OrderByDescending(e => !string.IsNullOrEmpty(query) && SqlHelpers.ContainsIgnoreCaseAccents(e.Headword(order.WritingSystem), query!)) - .ThenByDescending(e => !string.IsNullOrEmpty(query) && SqlHelpers.StartsWithIgnoreCaseAccents(e.Headword(order.WritingSystem), query!)) - .ThenBy(e => e.Headword(order.WritingSystem).Length) - .ThenBy(e => e.Headword(order.WritingSystem)) - .ThenBy(e => e.Id); + return + from e in entries + join mt in morphTypes on e.MorphType equals mt.Kind into mtGroup + from mt in mtGroup.DefaultIfEmpty() + orderby + !string.IsNullOrEmpty(query) && SqlHelpers.StartsWithIgnoreCaseAccents(e.Headword(order.WritingSystem), query!) descending, + !string.IsNullOrEmpty(query) && SqlHelpers.ContainsIgnoreCaseAccents(e.Headword(order.WritingSystem), query!) descending, + e.Headword(order.WritingSystem).Length, + e.Headword(order.WritingSystem), + mt != null ? mt.SecondaryOrder : stemOrder.FirstOrDefault(), + // e.HomographNumber, + e.Id + select e; } else { - return entries - .OrderBy(e => !string.IsNullOrEmpty(query) && SqlHelpers.ContainsIgnoreCaseAccents(e.Headword(order.WritingSystem), query!)) - .ThenBy(e => !string.IsNullOrEmpty(query) && SqlHelpers.StartsWithIgnoreCaseAccents(e.Headword(order.WritingSystem), query!)) - .ThenByDescending(e => e.Headword(order.WritingSystem).Length) - .ThenByDescending(e => e.Headword(order.WritingSystem)) - .ThenByDescending(e => e.Id); + return + from e in entries + join mt in morphTypes on e.MorphType equals mt.Kind into mtGroup + from mt in mtGroup.DefaultIfEmpty() + orderby + !string.IsNullOrEmpty(query) && SqlHelpers.StartsWithIgnoreCaseAccents(e.Headword(order.WritingSystem), query!), + !string.IsNullOrEmpty(query) && SqlHelpers.ContainsIgnoreCaseAccents(e.Headword(order.WritingSystem), query!), + e.Headword(order.WritingSystem).Length descending, + e.Headword(order.WritingSystem) descending, + (mt != null ? mt.SecondaryOrder : stemOrder.FirstOrDefault()) descending, + // e.HomographNumber descending, + e.Id descending + select e; } } } diff --git a/backend/FwLite/LcmCrdt/FullTextSearch/EntrySearchService.cs b/backend/FwLite/LcmCrdt/FullTextSearch/EntrySearchService.cs index 64d5bdaa89..40ba059e58 100644 --- a/backend/FwLite/LcmCrdt/FullTextSearch/EntrySearchService.cs +++ b/backend/FwLite/LcmCrdt/FullTextSearch/EntrySearchService.cs @@ -29,9 +29,9 @@ public class EntrySearchService(LcmCrdtDbContext dbContext, ILogger EntrySearchRecordsTable => dbContext.GetTable(); - public IQueryable Filter(IQueryable queryable, string query) + public IQueryable Filter(IQueryable queryable, string query, WritingSystemId wsId, MorphType[] morphTypes) { - return FilterInternal(queryable, query).Select(t => t.Entry); + return FilterInternal(queryable, query, wsId, morphTypes).Select(t => t.Entry); } /// @@ -42,39 +42,85 @@ public IQueryable Filter(IQueryable queryable, string query) /// public IQueryable FilterAndRank(IQueryable queryable, string query, - WritingSystemId wsId) + WritingSystemId wsId, + MorphType[] morphTypes) { - var filtered = FilterInternal(queryable, query); + var morphTypeTable = dbContext.GetTable(); + var filtered = FilterInternal(queryable, query, wsId, morphTypes); var ordered = filtered - .OrderByDescending(t => t.HeadwordMatches) + .OrderByDescending(t => t.HeadwordMatches ? 0 : Sql.Ext.SQLite().Rank(t.SearchRecord)) .ThenByDescending(t => t.HeadwordPrefixMatches) - .ThenBy(t => t.HeadwordMatches ? t.SearchRecord.Headword.Length : int.MaxValue) - .ThenBy(t => - t.HeadwordMatches - ? t.SearchRecord.Headword.CollateUnicode(wsId) - : string.Empty) - .ThenBy(t => Sql.Ext.SQLite().Rank(t.SearchRecord)).ThenBy(t => t.Entry.Id); + .ThenBy(t => t.Headword.Length) + .ThenBy(t => t.Headword.CollateUnicode(wsId)) + .ThenBy(t => t.HeadwordMatches + ? morphTypeTable.Where(mt => mt.Kind == t.Entry.MorphType || mt.Kind == MorphTypeKind.Stem) + .OrderBy(mt => mt.Kind == MorphTypeKind.Stem ? 1 : 0) // stem is the fallback, so it should come last + .Select(mt => mt.SecondaryOrder).FirstOrDefault() + : int.MaxValue) + // .ThenBy(t => t.Entry.HomographNumber) + .ThenBy(t => t.Entry.Id); return ordered.Select(t => t.Entry); } - private sealed record FilterProjection(Entry Entry, EntrySearchRecord SearchRecord, bool HeadwordMatches, bool HeadwordPrefixMatches); + private sealed record FilterProjection(Entry Entry, EntrySearchRecord SearchRecord, string Headword, bool HeadwordMatches, bool HeadwordPrefixMatches); - private IQueryable FilterInternal(IQueryable queryable, string query) + private IQueryable FilterInternal(IQueryable queryable, string query, WritingSystemId wsId, MorphType[] morphTypes) { var ftsString = ToFts5LiteralString(query); + var queryWithoutMorphTokens = StripMorphTokens(query, morphTypes); - //starting from EntrySearchRecordsTable rather than queryable otherwise linq2db loses track of the table return from searchRecord in EntrySearchRecordsTable from entry in queryable.InnerJoin(r => r.Id == searchRecord.Id) where Sql.Ext.SQLite().Match(searchRecord, ftsString) && - (entry.LexemeForm.SearchValue(query) + (entry.LexemeForm.SearchValue(queryWithoutMorphTokens) || entry.CitationForm.SearchValue(query) - || entry.Senses.Any(s => s.Gloss.SearchValue(query))) - let headwordMatches = SqlHelpers.ContainsIgnoreCaseAccents(searchRecord.Headword, query) - let headwordPrefixMatches = SqlHelpers.StartsWithIgnoreCaseAccents(searchRecord.Headword, query) - select new FilterProjection(entry, searchRecord, headwordMatches, headwordPrefixMatches); + || entry.Senses.Any(s => s.Gloss.SearchValue(query)) + || SqlHelpers.ContainsIgnoreCaseAccents(entry.Headword(wsId), query)) + // this does not include morph tokens, which is actually what we want. Morph-tokens should not affect sorting. + // If the user uses a citation form with morph tokens, then oh well. Not even FLEx strips the morph-tokens before sorting in that case. + let headword = entry.Headword(wsId) + let headwordQuery = string.IsNullOrEmpty((Json.Value(entry.CitationForm, ms => ms[wsId]) ?? "").Trim()) + ? queryWithoutMorphTokens : query + let headwordMatches = SqlHelpers.ContainsIgnoreCaseAccents(headword, headwordQuery) + let headwordPrefixMatches = SqlHelpers.StartsWithIgnoreCaseAccents(headword, headwordQuery) + select new FilterProjection(entry, searchRecord, headword, headwordMatches, headwordPrefixMatches); + } + + private static string StripMorphTokens(string input, MorphType[] morphTypes) + { + if (string.IsNullOrEmpty(input)) return input; + + var bestMatchScore = 0; + MorphType? bestMorphTypeMatch = null; + + foreach (var morphType in morphTypes) + { + var currMatchScore = 0; + if (morphType.Prefix is not null && input.StartsWith(morphType.Prefix)) + currMatchScore += 2; // prefer leading tokens + if (morphType.Postfix is not null && input.EndsWith(morphType.Postfix)) + currMatchScore += 1; + + if (currMatchScore > bestMatchScore) + { + bestMorphTypeMatch = morphType; + bestMatchScore = currMatchScore; + } + } + + if (bestMorphTypeMatch is not null) + { + var result = input; + if (bestMorphTypeMatch.Prefix is not null && input.StartsWith(bestMorphTypeMatch.Prefix)) + result = result[bestMorphTypeMatch.Prefix.Length..]; + if (bestMorphTypeMatch.Postfix is not null && input.EndsWith(bestMorphTypeMatch.Postfix)) + result = result[..^bestMorphTypeMatch.Postfix.Length]; + return result; + } + + return input; } private static string ToFts5LiteralString(string query) @@ -166,7 +212,8 @@ public async Task UpdateEntrySearchTable(Guid entryId) public async Task UpdateEntrySearchTable(Entry entry) { var writingSystems = await dbContext.WritingSystemsOrdered.ToArrayAsync(); - var record = ToEntrySearchRecord(entry, writingSystems); + var morphTypeDataLookup = await dbContext.MorphTypes.ToDictionaryAsync(m => m.Kind); + var record = ToEntrySearchRecord(entry, writingSystems, morphTypeDataLookup); await InsertOrUpdateEntrySearchRecord(record, EntrySearchRecordsTable); } @@ -214,7 +261,8 @@ public static async Task UpdateEntrySearchTable(IEnumerable entries, return ws1.Id.CompareTo(ws2.Id); }); var entrySearchRecordsTable = dbContext.GetTable(); - var searchRecords = entries.Select(entry => ToEntrySearchRecord(entry, writingSystems)); + var morphTypeDataLookup = await dbContext.MorphTypes.ToDictionaryAsync(m => m.Kind); + var searchRecords = entries.Select(entry => ToEntrySearchRecord(entry, writingSystems, morphTypeDataLookup)); foreach (var entrySearchRecord in searchRecords) { //can't use bulk copy here because that creates duplicate rows @@ -232,11 +280,12 @@ public async Task RegenerateEntrySearchTable() await EntrySearchRecordsTable.TruncateAsync(); var writingSystems = await dbContext.WritingSystemsOrdered.ToArrayAsync(); + var morphTypeDataLookup = await dbContext.MorphTypes.ToDictionaryAsync(m => m.Kind); await EntrySearchRecordsTable .BulkCopyAsync(dbContext.Set() .LoadWith(e => e.Senses) .AsQueryable() - .Select(entry => ToEntrySearchRecord(entry, writingSystems)) + .Select(entry => ToEntrySearchRecord(entry, writingSystems, morphTypeDataLookup)) .AsAsyncEnumerable()); await transaction.CommitAsync(); } @@ -256,12 +305,21 @@ private async Task HasMissingEntries() return await EntrySearchRecordsTable.CountAsync() != await dbContext.Set().CountAsync(); } - private static EntrySearchRecord ToEntrySearchRecord(Entry entry, WritingSystem[] writingSystems) + private static EntrySearchRecord ToEntrySearchRecord(Entry entry, WritingSystem[] writingSystems, + IReadOnlyDictionary morphTypeDataLookup) { + // Include headwords (with morph tokens) for ALL vernacular writing systems (space-separated). + // This ensures FTS matches across all WS, including morph-token-decorated forms. + var headwords = EntryQueryHelpers.ComputeHeadwords(entry, morphTypeDataLookup); + var headword = string.Join(" ", + writingSystems.Where(ws => ws.Type == WritingSystemType.Vernacular) + .Select(ws => headwords[ws.WsId]) + .Where(h => !string.IsNullOrEmpty(h))); + return new EntrySearchRecord() { Id = entry.Id, - Headword = entry.Headword(writingSystems.First(ws => ws.Type == WritingSystemType.Vernacular).WsId), + Headword = headword, LexemeForm = LexemeForm(writingSystems, entry), CitationForm = CitationForm(writingSystems, entry), Definition = Definition(writingSystems, entry), diff --git a/backend/FwLite/LcmCrdt/Json.cs b/backend/FwLite/LcmCrdt/Json.cs index ad9bebf943..71dfe76294 100644 --- a/backend/FwLite/LcmCrdt/Json.cs +++ b/backend/FwLite/LcmCrdt/Json.cs @@ -164,6 +164,15 @@ private static Expression>> QueryExpression return (values) => values.QueryInternal().Select(v => v.Value); } + [ExpressionMethod(nameof(QueryEntriesExpressionMultiString))] + internal static IQueryable> QueryEntries(MultiString values) + { + return values.Values.Select(kv => new JsonEach(kv.Value, kv.Key.Code, "", 0, "", "")).AsQueryable(); + } + + private static Expression>>> QueryEntriesExpressionMultiString() => + (values) => values.QueryInternal(); + //indicates that linq2db should rewrite Sense.SemanticDomains.Query(d => d.Code) //into code in QueryExpression: Sense.SemanticDomains.QueryInternal().Select(v => Sql.Value(v.Value, d => d.Code)) [ExpressionMethod(nameof(QuerySelectExpression))] @@ -204,7 +213,7 @@ public static string ToString(Guid? guid) } //maps to a row from json_each - private record JsonEach( + internal record JsonEach( [property: Column("value")] T Value, [property: Column("key")] string Key, [property: Column("type")] string Type, diff --git a/backend/FwLite/LcmCrdt/QueryHelpers.cs b/backend/FwLite/LcmCrdt/QueryHelpers.cs index 57753f4f31..37e151c271 100644 --- a/backend/FwLite/LcmCrdt/QueryHelpers.cs +++ b/backend/FwLite/LcmCrdt/QueryHelpers.cs @@ -4,7 +4,8 @@ namespace LcmCrdt; public static class QueryHelpers { - public static void Finalize(this Entry entry, IComparer complexFormComparer) + public static void Finalize(this Entry entry, + IComparer complexFormComparer) { entry.Senses.ApplySortOrder(); entry.Components.ApplySortOrder(); diff --git a/backend/FwLite/LcmCrdt/SqlHelpers.cs b/backend/FwLite/LcmCrdt/SqlHelpers.cs index d14a6b1876..4b2d4fe0d5 100644 --- a/backend/FwLite/LcmCrdt/SqlHelpers.cs +++ b/backend/FwLite/LcmCrdt/SqlHelpers.cs @@ -36,4 +36,7 @@ private static Expression> SearchValueExpression [Sql.Expression(CustomSqliteFunctionInterceptor.StartsWithFunction + "({0}, {1})")] public static bool StartsWithIgnoreCaseAccents(string s, string search) => s.StartsWithDiacriticMatch(search); + + [Sql.Expression("({0} || {1} || {2})", PreferServerSide = true)] + public static string ConcatTokens(string leading, string value, string trailing) => leading + value + trailing; } diff --git a/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs b/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs index 801235e33a..2481fae96b 100644 --- a/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs +++ b/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs @@ -388,25 +388,25 @@ public async Task CanFilterToExampleSentenceWithMissingSentence() [Theory] [InlineData("a", "a", true)] - [InlineData("a", "A", false)] - [InlineData("A", "Ã", false)] - [InlineData("ap", "apple", false)] - [InlineData("ap", "APPLE", false)] - [InlineData("ing", "walking", false)] - [InlineData("ing", "WALKING", false)] - [InlineData("Ãp", "Ãpple", false)] - [InlineData("Ãp", "ãpple", false)] - [InlineData("ap", "Ãpple", false)] - [InlineData("app", "Ãpple", false)]//crdt fts only kicks in at 3 chars - [InlineData("й", "й", false)] // D, C - [InlineData("й", "й", false)] // C, D + [InlineData("a", "A")] + [InlineData("A", "Ã")] + [InlineData("ap", "apple")] + [InlineData("ap", "APPLE")] + [InlineData("ing", "walking")] + [InlineData("ing", "WALKING")] + [InlineData("Ãp", "Ãpple")] + [InlineData("Ãp", "ãpple")] + [InlineData("ap", "Ãpple")] + [InlineData("app", "Ãpple")]//crdt fts only kicks in at 3 chars + [InlineData("й", "й")] // D, C + [InlineData("й", "й")] // C, D [InlineData("й", "й", true)] // C, C [InlineData("й", "й", true)] // D, D - [InlineData("ймыл", "ймыл", false)] // D, C - [InlineData("ймыл", "ймыл", false)] // C, D + [InlineData("ймыл", "ймыл")] // D, C + [InlineData("ймыл", "ймыл")] // C, D [InlineData("ймыл", "ймыл", true)] // C, C [InlineData("ймыл", "ймыл", true)] // D, D - public async Task SuccessfulMatches(string searchTerm, string word, bool identical) + public async Task SuccessfulMatches(string searchTerm, string word, bool identical = false) { // identical is to make the test cases more readable when they only differ in their normalization (searchTerm == word).Should().Be(identical); @@ -515,6 +515,135 @@ public async Task PunctuationWorks(string searchTerm, string word) var results = await Api.SearchEntries(searchTerm).Select(e => e.LexemeForm["en"]).ToArrayAsync(); results.Should().Contain(word); } + + // This test guards against the mistake of only matching on headword + [Theory] + [InlineData("mango")] // FTS + [InlineData("m")] // non-FTS + public async Task SearchEntries_MatchesLexeme(string searchTerm) + { + var prefixQuery = $"{searchTerm}-"; + await Api.CreateMorphType(new MorphType + { + Id = Guid.NewGuid(), + Kind = MorphTypeKind.Prefix, + Name = { ["en"] = "Prefix" }, + Postfix = "-", + SecondaryOrder = 3 + }); + var lexemeOnlyMatchEntry = await Api.CreateEntry(new Entry + { + LexemeForm = { ["en"] = "mango" }, + CitationForm = { ["en"] = "zzzzzzzz" }, + MorphType = MorphTypeKind.Stem, + }); + var lexemeOnlyMatchWithMorphToken = await Api.CreateEntry(new Entry + { + LexemeForm = { ["en"] = "mango" }, + CitationForm = { ["en"] = "zzzzzzzz" }, + MorphType = MorphTypeKind.Prefix, + }); + var entries = await Api.SearchEntries(searchTerm).ToArrayAsync(); + entries.Should().Contain(e => e.Id == lexemeOnlyMatchEntry.Id); + entries.Should().Contain(e => e.Id == lexemeOnlyMatchWithMorphToken.Id); + } + + [Theory] + [InlineData("mango-")] // FTS + [InlineData("o-")] // non-FTS + public async Task SearchEntries_CitationFormOverridesMorphTokens(string searchTerm) + { + var prefixQuery = $"{searchTerm}-"; + await Api.CreateMorphType(new MorphType + { + Id = Guid.NewGuid(), + Kind = MorphTypeKind.Prefix, + Name = { ["en"] = "Prefix" }, + Postfix = "-", + SecondaryOrder = 3 + }); + var entryWithOverriddenMorphToken = await Api.CreateEntry(new Entry + { + LexemeForm = { ["en"] = "mango" }, + // citation form overrides "mango-" + CitationForm = { ["en"] = "zzzzzzzz" }, + MorphType = MorphTypeKind.Prefix, + }); + var entries = await Api.SearchEntries(searchTerm).ToArrayAsync(); + entries.Should().NotContain(e => e.Id == entryWithOverriddenMorphToken.Id); + } + + [Theory] + [InlineData("mango-")] // FTS + [InlineData("o-")] // non-FTS + public async Task MorphTokenSearch_FindsPrefixEntry(string searchTerm) + { + await Api.CreateMorphType(new MorphType + { + Id = Guid.NewGuid(), + Kind = MorphTypeKind.Prefix, + Name = { ["en"] = "Prefix" }, + Postfix = "-", + SecondaryOrder = 3 + }); + var id = Guid.NewGuid(); + await Api.CreateEntry(new Entry { Id = id, LexemeForm = { ["en"] = "mango" }, MorphType = MorphTypeKind.Prefix }); + + var results = await Api.SearchEntries(searchTerm).ToArrayAsync(); + results.Should().Contain(e => e.Id == id); + } + + [Theory] + [InlineData("-mango")] // FTS + [InlineData("-m")] // non-FTS + public async Task MorphTokenSearch_FindsSuffixEntry(string searchTerm) + { + await Api.CreateMorphType(new MorphType + { + Id = Guid.NewGuid(), + Kind = MorphTypeKind.Suffix, + Name = { ["en"] = "Suffix" }, + Prefix = "-", + SecondaryOrder = 6 + }); + var id = Guid.NewGuid(); + await Api.CreateEntry(new Entry { Id = id, LexemeForm = { ["en"] = "mango" }, MorphType = MorphTypeKind.Suffix }); + + var results = await Api.SearchEntries(searchTerm).ToArrayAsync(); + results.Should().Contain(e => e.Id == id); + } + + [Fact] + public async Task MorphTokenSearch_DoesNotMatchWithoutToken() + { + await Api.CreateMorphType(new MorphType + { + Id = Guid.NewGuid(), + Kind = MorphTypeKind.Prefix, + Name = { ["en"] = "Prefix" }, + Postfix = "-", + SecondaryOrder = 3 + }); + await Api.CreateEntry(new Entry { LexemeForm = { ["en"] = "mango" }, MorphType = MorphTypeKind.Root }); + + // Searching for "-mango" should NOT match a Root entry (no morph tokens) + var results = await Api.SearchEntries("mango-").Select(e => e.LexemeForm["en"]).ToArrayAsync(); + results.Should().NotContain("mango"); + } + + [Theory] + [InlineData("mango", SortField.Headword)] // FTS + [InlineData("m", SortField.Headword)] // non-FTS + [InlineData("mango", SortField.SearchRelevance)] // FTS + [InlineData("m", SortField.SearchRelevance)] // non-FTS + public async Task SearchEntries_EntryWithNoMorphTypeData_Works(string searchTerm, SortField sortField) + { + // MorphType.Unknown will likely not be included in the morph-type DB-table + var id = Guid.NewGuid(); + await Api.CreateEntry(new Entry { Id = id, LexemeForm = { ["en"] = "mango" }, MorphType = MorphTypeKind.Unknown }); + var results = await Api.SearchEntries(searchTerm, new(new(sortField))).ToArrayAsync(); + results.Should().Contain(e => e.Id == id); + } } // A seperate class to preserve the readability of the results in the main test class diff --git a/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs b/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs index 262d6b7ea4..a815324133 100644 --- a/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs +++ b/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs @@ -1,19 +1,24 @@ +using MiniLcm.Tests.AutoFakerHelpers; +using Soenneker.Utils.AutoBogus; + namespace MiniLcm.Tests; public abstract class SortingTestsBase : MiniLcmTestBase { + protected static readonly AutoFaker Faker = new(AutoFakerDefault.Config); + private Task CreateEntry(string headword) { return Api.CreateEntry(new() { LexemeForm = { { "en", headword } }, }); } // ReSharper disable InconsistentNaming - const string Ru_A= "\u0410"; - const string Ru_a = "\u0430"; - const string Ru_Б= "\u0411"; - const string Ru_б = "\u0431"; - const string Ru_В= "\u0412"; - const string Ru_в = "\u0432"; + private const string Ru_A = "\u0410"; + private const string Ru_a = "\u0430"; + private const string Ru_Б = "\u0411"; + private const string Ru_б = "\u0431"; + private const string Ru_В = "\u0412"; + private const string Ru_в = "\u0432"; // ReSharper restore InconsistentNaming [Theory] @@ -49,4 +54,270 @@ await Api.CreateWritingSystem(new() await Api.GetEntries(new QueryOptions(new SortOptions(SortField.Headword, wsId))) .ToArrayAsync(); } + + [Theory] + [InlineData("aaaa", SortField.Headword)] // FTS + [InlineData("a", SortField.Headword)] // non-FTS + [InlineData("aaaa", SortField.SearchRelevance)] // FTS + [InlineData("a", SortField.SearchRelevance)] // non-FTS + public async Task MorphTokens_DoNotAffectSortOrder(string query, SortField sortField) + { + MorphType[] morphTypes = [ + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Root, Name = { ["en"] = "Root" }, SecondaryOrder = 1 }, + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Prefix, Name = { ["en"] = "Prefix" }, Prefix = "-", SecondaryOrder = 3 }, + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Suffix, Name = { ["en"] = "Suffix" }, Postfix = "-", SecondaryOrder = 6 }, + ]; + + foreach (var morphType in morphTypes) + await Api.CreateMorphType(morphType); + + // All three entries have LexemeForm "aaaa". Their headwords are: + // Root: "aaaa" (no tokens) + // Prefix: "-aaaa" (leading token "-") + // Suffix: "aaaa-" (trailing token "-") + // Sort order should ignore morph tokens and differentiate only by SecondaryOrder. + Entry[] expected = [ + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Root }, // SecondaryOrder = 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Prefix }, // SecondaryOrder = 3 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 6 + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(query, new(new(sortField))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } + + [Theory] + [InlineData("aaaa")] // FTS rank + [InlineData("a")] // non-FTS rank + public async Task SecondaryOrder_Relevance_LexemeForm(string searchTerm) + { + MorphType[] morphTypes = [ + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Root, Name = { ["en"] = "Root" }, SecondaryOrder = 1 }, + // new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Stem, Name = { ["en"] = "Stem" }, SecondaryOrder = 1 }, + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.BoundRoot, Name = { ["en"] = "BoundRoot" }, SecondaryOrder = 2 }, + // new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.BoundStem, Name = { ["en"] = "BoundStem" }, SecondaryOrder = 2 }, + ]; + + foreach (var morphType in morphTypes) + await Api.CreateMorphType(morphType); + + static Entry[] CreateSortedEntrySet(string headword) + { + return [ + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = headword }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = lexeme }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = headword }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = lexeme }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + ]; + } + + var exactMatches = CreateSortedEntrySet("aaaa"); + var firstShortestStartsWithMatches = CreateSortedEntrySet("aaaab"); + var lastShortestStartsWithMatches = CreateSortedEntrySet("aaaac"); + var firstLongestStartsWithMatches = CreateSortedEntrySet("aaaabb"); + var lastLongestStartsWithMatches = CreateSortedEntrySet("aaaacc"); + var firstShortestContainsMatches = CreateSortedEntrySet("baaaa"); + var lastShortestContainsMatches = CreateSortedEntrySet("caaaa"); + var firstLongestContainsMatches = CreateSortedEntrySet("bbaaaa"); + var lastLongestContainsMatches = CreateSortedEntrySet("ccaaaa"); + + var entryId = Guid.NewGuid(); + Entry nonHeadwordMatch = new() { Id = entryId, Senses = [new() { EntryId = entryId, Gloss = { ["en"] = "aaaa" } }] }; + + Entry[] expected = [ + .. exactMatches, + .. firstShortestStartsWithMatches, + .. lastShortestStartsWithMatches, + .. firstLongestStartsWithMatches, + .. lastLongestStartsWithMatches, + .. firstShortestContainsMatches, + .. lastShortestContainsMatches, + .. firstLongestContainsMatches, + .. lastLongestContainsMatches, + nonHeadwordMatch, + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(searchTerm, new(new(SortField.SearchRelevance))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options); + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } + + [Theory] + [InlineData("aaaa")] // FTS rank + [InlineData("a")] // non-FTS rank + public async Task SecondaryOrder_Relevance_CitationForm(string searchTerm) + { + MorphType[] morphTypes = [ + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Root, Name = { ["en"] = "Root" }, SecondaryOrder = 1 }, + // new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Stem, Name = { ["en"] = "Stem" }, SecondaryOrder = 1 }, + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.BoundRoot, Name = { ["en"] = "BoundRoot" }, SecondaryOrder = 2 }, + // new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.BoundStem, Name = { ["en"] = "BoundStem" }, SecondaryOrder = 2 }, + ]; + + foreach (var morphType in morphTypes) + await Api.CreateMorphType(morphType); + + static Entry[] CreateSortedEntrySet(string headword) + { + return [ + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = headword }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = headword }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = headword }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = headword }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + ]; + } + + var exactMatches = CreateSortedEntrySet("aaaa"); + var firstShortestStartsWithMatches = CreateSortedEntrySet("aaaab"); + var lastShortestStartsWithMatches = CreateSortedEntrySet("aaaac"); + var firstLongestStartsWithMatches = CreateSortedEntrySet("aaaabb"); + var lastLongestStartsWithMatches = CreateSortedEntrySet("aaaacc"); + var firstShortestContainsMatches = CreateSortedEntrySet("baaaa"); + var lastShortestContainsMatches = CreateSortedEntrySet("caaaa"); + var firstLongestContainsMatches = CreateSortedEntrySet("bbaaaa"); + var lastLongestContainsMatches = CreateSortedEntrySet("ccaaaa"); + + var entryId = Guid.NewGuid(); + Entry nonHeadwordMatch = new() { Id = entryId, Senses = [new() { EntryId = entryId, Gloss = { ["en"] = "aaaa" } }] }; + + Entry[] expected = [ + .. exactMatches, + .. firstShortestStartsWithMatches, + .. lastShortestStartsWithMatches, + .. firstLongestStartsWithMatches, + .. lastLongestStartsWithMatches, + .. firstShortestContainsMatches, + .. lastShortestContainsMatches, + .. firstLongestContainsMatches, + .. lastLongestContainsMatches, + nonHeadwordMatch, + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(searchTerm, new(new(SortField.SearchRelevance))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options); + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } + + [Theory] + [InlineData("baaa")] // FTS rank + [InlineData("b")] // non-FTS rank + public async Task SecondaryOrder_Headword_LexemeForm(string searchTerm) + { + MorphType[] morphTypes = [ + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Root, Name = { ["en"] = "Root" }, SecondaryOrder = 1 }, + // new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Stem, Name = { ["en"] = "Stem" }, SecondaryOrder = 1 }, + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.BoundRoot, Name = { ["en"] = "BoundRoot" }, SecondaryOrder = 2 }, + // new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.BoundStem, Name = { ["en"] = "BoundStem" }, SecondaryOrder = 2 }, + ]; + + foreach (var morphType in morphTypes) + await Api.CreateMorphType(morphType); + + Entry[] expected = [ + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "abaaa" }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "abaaa" }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "abaaa" }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "abaaa" }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "baaa" }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "baaa" }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "baaa" }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "baaa" }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(searchTerm, new(new(SortField.Headword))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options); + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } + + [Theory] + [InlineData("baaa")] // FTS rank + [InlineData("b")] // non-FTS rank + public async Task SecondaryOrder_Headword_CitationForm(string searchTerm) + { + MorphType[] morphTypes = [ + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Root, Name = { ["en"] = "Root" }, SecondaryOrder = 1 }, + // new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Stem, Name = { ["en"] = "Stem" }, SecondaryOrder = 1 }, + new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.BoundRoot, Name = { ["en"] = "BoundRoot" }, SecondaryOrder = 2 }, + // new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.BoundStem, Name = { ["en"] = "BoundStem" }, SecondaryOrder = 2 }, + ]; + + foreach (var morphType in morphTypes) + await Api.CreateMorphType(morphType); + + Entry[] expected = [ + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "abaaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "abaaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "abaaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "abaaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "baaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "baaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "baaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "baaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(searchTerm, new(new(SortField.Headword))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options); + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } } diff --git a/backend/FwLite/MiniLcm/Models/Entry.cs b/backend/FwLite/MiniLcm/Models/Entry.cs index 575200aac8..550080e628 100644 --- a/backend/FwLite/MiniLcm/Models/Entry.cs +++ b/backend/FwLite/MiniLcm/Models/Entry.cs @@ -35,10 +35,11 @@ public string Headword() { //order by code to ensure the headword is stable //todo choose ws by preference based on ws order/default + //todo this does not apply morph tokens — see #1284 //https://github.com/sillsdev/languageforge-lexbox/issues/1284 - var word = CitationForm.Values.OrderBy(kvp => kvp.Key.Code).FirstOrDefault().Value; - if (string.IsNullOrEmpty(word)) word = LexemeForm.Values.OrderBy(kvp => kvp.Key.Code).FirstOrDefault().Value; - return word?.Trim() ?? UnknownHeadword; + var word = CitationForm.Values.OrderBy(kvp => kvp.Key.Code).FirstOrDefault().Value?.Trim(); + if (string.IsNullOrEmpty(word)) word = LexemeForm.Values.OrderBy(kvp => kvp.Key.Code).FirstOrDefault().Value?.Trim(); + return string.IsNullOrEmpty(word) ? UnknownHeadword : word; } public Entry Copy() diff --git a/backend/FwLite/MiniLcm/Models/MorphType.cs b/backend/FwLite/MiniLcm/Models/MorphType.cs index d241a35ff5..281e62c96e 100644 --- a/backend/FwLite/MiniLcm/Models/MorphType.cs +++ b/backend/FwLite/MiniLcm/Models/MorphType.cs @@ -30,7 +30,7 @@ public enum MorphTypeKind public class MorphType : IObjectWithId { public virtual Guid Id { get; set; } - public virtual MorphTypeKind Kind { get; set; } + public virtual required MorphTypeKind Kind { get; set; } public virtual MultiString Name { get; set; } = []; public virtual MultiString Abbreviation { get; set; } = []; public virtual RichMultiString Description { get; set; } = []; diff --git a/frontend/viewer/src/project/demo/demo-entry-data.ts b/frontend/viewer/src/project/demo/demo-entry-data.ts index 33b8e32100..1b4013567a 100644 --- a/frontend/viewer/src/project/demo/demo-entry-data.ts +++ b/frontend/viewer/src/project/demo/demo-entry-data.ts @@ -101,6 +101,7 @@ export const writingSystems: IWritingSystems = { export const _entries: IEntry[] = [ { 'id': '34779c06-5a73-4fe9-8325-b110b23f9293', + 'headword': {}, 'lexemeForm': { 'seh': 'nyumba', 'seh-fonipa-x-etic': 'ɲumba', 'ny': 'nyumba' }, 'citationForm': { 'seh': '!nyumba', 'seh-fonipa-x-etic': 'ɲumba', 'ny': 'nyumba' }, 'literalMeaning': {}, @@ -167,6 +168,7 @@ export const _entries: IEntry[] = [ }, { 'id': '34779c06-5a73-4fe9-8325-b110b23f9294', + 'headword': {}, 'lexemeForm': { 'seh': 'dance', 'seh-fonipa-x-etic': 'dæns', 'ny': 'vina' }, 'citationForm': {}, 'literalMeaning': {}, diff --git a/frontend/viewer/src/stories/editor/entity-primitives/entry-editor-primitive.stories.svelte b/frontend/viewer/src/stories/editor/entity-primitives/entry-editor-primitive.stories.svelte index e2ce2f2113..4f2b4e4f37 100644 --- a/frontend/viewer/src/stories/editor/entity-primitives/entry-editor-primitive.stories.svelte +++ b/frontend/viewer/src/stories/editor/entity-primitives/entry-editor-primitive.stories.svelte @@ -9,6 +9,9 @@ let entry: IEntry = $state({ id: '36b8f84d-df4e-4d49-b662-bcde71a8764f', + headword: { + 'seh': 'Citation form', + }, lexemeForm: { 'seh': 'Lexeme form', },