Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
using FwDataMiniLcmBridge.Api;
using FwDataMiniLcmBridge.LcmUtils;
using FwDataMiniLcmBridge.Tests.Fixtures;
using MiniLcm.Models;

namespace FwDataMiniLcmBridge.Tests.MiniLcmTests;

Expand All @@ -9,4 +12,51 @@ protected override Task<IMiniLcmApi> NewApi()
{
return Task.FromResult<IMiniLcmApi>(fixture.NewProjectApi("sorting-test", "en", "en"));
}

[Theory]
[InlineData("aaaa", SortField.Headword)] // FTS rank
[InlineData("a", SortField.Headword)] // non-FTS rank
[InlineData("aaaa", SortField.SearchRelevance)] // FTS rank
[InlineData("a", SortField.SearchRelevance)] // non-FTS rank
public async Task SecondaryOrder_DefaultsToStem(string query, SortField sortField)
{
MorphType[] morphTypes = [
new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Stem, Name = { ["en"] = "Stem" }, SecondaryOrder = 1 },
new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.BoundStem, Name = { ["en"] = "BoundStem" }, SecondaryOrder = 2 },
new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Suffix, Name = { ["en"] = "Suffix" }, Postfix = "-", SecondaryOrder = 6 },
];

foreach (var morphType in morphTypes)
await Api.CreateMorphType(morphType);

var otherMorphTypeEntryId = Guid.NewGuid();
Entry[] expected = [
new() { Id = otherMorphTypeEntryId, LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Unknown }, // SecondaryOrder defaults to Stem = 1
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.BoundStem }, // SecondaryOrder = 2
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 6
];

var ids = expected.Select(e => e.Id).ToHashSet();

foreach (var entry in Faker.Faker.Random.Shuffle(expected))
await Api.CreateEntry(entry);

var fwDataApi = (BaseApi as FwDataMiniLcmApi)!;
await fwDataApi.Cache.DoUsingNewOrCurrentUOW("Clear morph type",
"Revert morph type",
() =>
{
// the fwdata api doesn't allow creating entries with MorphType.Other or Unknown, so we force it
var otherMorphTypeEntry = fwDataApi.EntriesRepository.GetObject(otherMorphTypeEntryId);
otherMorphTypeEntry.LexemeFormOA.MorphTypeRA = null;
return ValueTask.CompletedTask;
});

var results = (await Api.SearchEntries(query, new(new(sortField))).ToArrayAsync())
.Where(e => ids.Contains(e.Id))
.ToList();

results.Should().BeEquivalentTo(expected,
options => options.Excluding(e => e.Headword).WithStrictOrdering());
}
}
42 changes: 38 additions & 4 deletions backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -652,14 +652,15 @@ private Entry FromLexEntry(ILexEntry entry)
{
try
{
return new Entry
var result = new Entry
{
Id = entry.Guid,
Note = FromLcmMultiString(entry.Comment),
LexemeForm = FromLcmMultiString(entry.LexemeFormOA?.Form),
CitationForm = FromLcmMultiString(entry.CitationForm),
LiteralMeaning = FromLcmMultiString(entry.LiteralMeaning),
MorphType = LcmHelpers.FromLcmMorphType(entry.PrimaryMorphType), // TODO: Decide what to do about entries with *mixed* morph types
HomographNumber = entry.HomographNumber,
Senses = [.. entry.AllSenses.Select(FromLexSense)],
ComplexFormTypes = ToComplexFormTypes(entry),
Components = [.. ToComplexFormComponents(entry)],
Expand All @@ -670,6 +671,8 @@ private Entry FromLexEntry(ILexEntry entry)
// ILexEntry.PublishIn is a virtual property that inverts DoNotPublishInRC against all publications
PublishIn = entry.PublishIn.Select(FromLcmPossibility).ToList(),
};
result.Headword = ComputeHeadword(result, entry.PrimaryMorphType);
return result;
}
catch (Exception e)
{
Expand All @@ -678,6 +681,35 @@ private Entry FromLexEntry(ILexEntry entry)
}
}

private static MultiString ComputeHeadword(Entry result, IMoMorphType? lcmMorphType)
{
var headword = new MultiString();
var leading = lcmMorphType?.Prefix ?? "";
var trailing = lcmMorphType?.Postfix ?? "";

// Iterate all WS keys that have data, not just "current" vernacular WSs,
// so we don't lose headwords for non-current or future writing systems.
var wsIds = result.CitationForm.Values.Keys
.Union(result.LexemeForm.Values.Keys);

foreach (var wsId in wsIds)
{
var citation = result.CitationForm[wsId];
if (!string.IsNullOrEmpty(citation))
{
headword[wsId] = citation.Trim();
continue;
}

var lexeme = result.LexemeForm[wsId];
if (!string.IsNullOrEmpty(lexeme))
{
headword[wsId] = (leading + lexeme + trailing).Trim();
}
}
return headword;
}

private List<ComplexFormType> ToComplexFormTypes(ILexEntry entry)
{
return entry.ComplexFormEntryRefs
Expand Down Expand Up @@ -939,12 +971,13 @@ private IEnumerable<ILexEntry> GetFilteredAndSortedEntries(Func<ILexEntry, bool>
private IEnumerable<ILexEntry> ApplySorting(SortOptions order, IEnumerable<ILexEntry> entries, string? query)
{
var sortWs = GetWritingSystemHandle(order.WritingSystem, WritingSystemType.Vernacular);
var stemSecondaryOrder = MorphTypeRepository.GetObject(MoMorphTypeTags.kguidMorphStem).SecondaryOrder;
if (order.Field == SortField.SearchRelevance)
{
return entries.ApplyRoughBestMatchOrder(order, sortWs, query);
return entries.ApplyRoughBestMatchOrder(order, sortWs, stemSecondaryOrder, query);
}

return order.ApplyOrder(entries, e => e.LexEntryHeadword(sortWs));
return entries.ApplyHeadwordOrder(order, sortWs, stemSecondaryOrder);
}

public IAsyncEnumerable<Entry> SearchEntries(string query, QueryOptions? options = null)
Expand All @@ -956,7 +989,7 @@ public IAsyncEnumerable<Entry> SearchEntries(string query, QueryOptions? options
private Func<ILexEntry, bool>? EntrySearchPredicate(string? query = null)
{
if (string.IsNullOrEmpty(query)) return null;
return entry => entry.CitationForm.SearchValue(query) ||
return entry => entry.SearchHeadWord(query) || // CitationForm.SearchValue would be redundant
entry.LexemeFormOA?.Form.SearchValue(query) is true ||
entry.AllSenses.Any(s => s.Gloss.SearchValue(query));
}
Expand Down Expand Up @@ -1001,6 +1034,7 @@ public async Task<Entry> CreateEntry(Entry entry, CreateEntryOptions? options =
UpdateLcmMultiString(lexEntry.CitationForm, entry.CitationForm);
UpdateLcmMultiString(lexEntry.LiteralMeaning, entry.LiteralMeaning);
UpdateLcmMultiString(lexEntry.Comment, entry.Note);
lexEntry.HomographNumber = entry.HomographNumber;

foreach (var sense in entry.Senses)
{
Expand Down
22 changes: 20 additions & 2 deletions backend/FwLite/FwDataMiniLcmBridge/Api/LcmHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace FwDataMiniLcmBridge.Api;

internal static class LcmHelpers
{
internal static string? LexEntryHeadword(this ILexEntry entry, int? ws = null)
internal static string? LexEntryHeadword(this ILexEntry entry, int? ws = null, bool applyMorphTokens = true)
{
var citationFormTs =
ws.HasValue ? entry.CitationForm.get_String(ws.Value)
Expand All @@ -27,7 +27,12 @@ internal static class LcmHelpers
: null;
var lexemeForm = lexemeFormTs?.Text?.Trim(WhitespaceChars);

return lexemeForm;
if (string.IsNullOrEmpty(lexemeForm) || !applyMorphTokens) return lexemeForm;

var morphType = entry.LexemeFormOA?.MorphTypeRA;
var leading = morphType?.Prefix ?? "";
var trailing = morphType?.Postfix ?? "";
return (leading + lexemeForm + trailing).Trim(WhitespaceChars);
}

internal static string LexEntryHeadwordOrUnknown(this ILexEntry entry, int? ws = null)
Expand All @@ -36,6 +41,19 @@ internal static string LexEntryHeadwordOrUnknown(this ILexEntry entry, int? ws =
return string.IsNullOrEmpty(headword) ? Entry.UnknownHeadword : headword;
}

internal static bool SearchHeadWord(this ILexEntry entry, string value)
{
foreach (var ws in entry.Cache.ServiceLocator.WritingSystems.VernacularWritingSystems)
{
var headword = entry.HeadWordForWs(ws.Handle);
if (headword is null) continue;
var text = headword.Text;
if (string.IsNullOrEmpty(text)) continue;
if (text.ContainsDiacriticMatch(value)) return true;
}
return false;
}

internal static bool SearchValue(this ITsMultiString multiString, string value)
{
for (var i = 0; i < multiString.StringCount; i++)
Expand Down
28 changes: 26 additions & 2 deletions backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,43 @@ namespace FwDataMiniLcmBridge.Api;

internal static class Sorting
{
public static IEnumerable<ILexEntry> ApplyHeadwordOrder(this IEnumerable<ILexEntry> entries, SortOptions order, int sortWsHandle, int stemSecondaryOrder)
{
if (order.Ascending)
{
return entries
.OrderBy(e => e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false))
.ThenBy(e => e.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)
.ThenBy(e => e.HomographNumber)
.ThenBy(e => e.Id.Guid);
}
else
{
return entries
.OrderByDescending(e => e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false))
.ThenByDescending(e => e.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)
.ThenByDescending(e => e.HomographNumber)
.ThenByDescending(e => e.Id.Guid);
}
}

/// <summary>
/// Rough emulation of FTS search relevance. Headword matches come first, preferring
/// prefix matches (e.g. when searching "tan" then "tanan" is before "matan"), then shorter, then alphabetical.
/// See also: EntrySearchService.FilterAndRank for the FTS-based equivalent in LcmCrdt.
/// </summary>
public static IEnumerable<ILexEntry> ApplyRoughBestMatchOrder(this IEnumerable<ILexEntry> entries, SortOptions order, int sortWsHandle, string? query = null)
public static IEnumerable<ILexEntry> ApplyRoughBestMatchOrder(this IEnumerable<ILexEntry> entries, SortOptions order, int sortWsHandle, int stemSecondaryOrder, string? query = null)
{
var projected = entries.Select(e => (Entry: e, Headword: e.LexEntryHeadword(sortWsHandle)));
var projected = entries.Select(e => (Entry: e, Headword: e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false)));
if (order.Ascending)
{
return projected
.OrderByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false))
.ThenByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false))
.ThenBy(x => x.Headword?.Length ?? 0)
.ThenBy(x => x.Headword)
.ThenBy(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)
.ThenBy(x => x.Entry.HomographNumber)
.ThenBy(x => x.Entry.Id.Guid)
.Select(x => x.Entry);
}
Expand All @@ -31,6 +53,8 @@ public static IEnumerable<ILexEntry> ApplyRoughBestMatchOrder(this IEnumerable<I
.ThenBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false))
.ThenByDescending(x => x.Headword?.Length ?? 0)
.ThenByDescending(x => x.Headword)
.ThenByDescending(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)
.ThenByDescending(x => x.Entry.HomographNumber)
.ThenByDescending(x => x.Entry.Id.Guid)
.Select(x => x.Entry);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ public override RichMultiString Note
get => new UpdateRichMultiStringProxy(_lcmEntry.Comment, _lexboxLcmApi);
set => throw new NotImplementedException();
}

public override int HomographNumber
{
get => _lcmEntry.HomographNumber;
set => _lcmEntry.HomographNumber = value;
}
}

public class UpdateMultiStringProxy(ITsMultiString multiString, FwDataMiniLcmApi lexboxLcmApi) : MultiString
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using System.Diagnostics.CodeAnalysis;
using MiniLcm.Models;
using SIL.LCModel;

Expand All @@ -8,11 +9,13 @@ public class UpdateMorphTypeProxy : MorphType
private readonly IMoMorphType _lcmMorphType;
private readonly FwDataMiniLcmApi _lexboxLcmApi;

[SetsRequiredMembers]
public UpdateMorphTypeProxy(IMoMorphType lcmMorphType, FwDataMiniLcmApi lexboxLcmApi)
{
_lcmMorphType = lcmMorphType;
Id = lcmMorphType.Guid;
_lexboxLcmApi = lexboxLcmApi;
Kind = LcmHelpers.FromLcmMorphType(lcmMorphType);
}

public override MultiString Name
Expand Down
Loading
Loading