Skip to content

Commit 13a4e0e

Browse files
committed
Uncomment HeadwordSearchValue expression; add SQL translation test
Uncomment HeadwordSearchValue so the compiler validates types and the test can validate SQL translation when run locally. Nothing calls HeadwordSearchValue in production yet — it's ready for when MorphTypeData becomes a CRDT entity. HeadwordSearchValueTests covers: - Morph token string concat inside json_each subquery - CitationForm priority over LexemeForm - Non-primary WS matching (the gap identified in review) https://claude.ai/code/session_01GFNCNDE5wHE2hGC7pQQp2f
1 parent 0e4af73 commit 13a4e0e

2 files changed

Lines changed: 125 additions & 33 deletions

File tree

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
using LcmCrdt.Data;
2+
using LinqToDB;
3+
using LinqToDB.EntityFrameworkCore;
4+
using Microsoft.EntityFrameworkCore;
5+
6+
namespace LcmCrdt.Tests.FullTextSearch;
7+
8+
public class HeadwordSearchValueTests : IAsyncLifetime
9+
{
10+
private MiniLcmApiFixture fixture = new();
11+
private LcmCrdtDbContext _context = null!;
12+
13+
public async Task InitializeAsync()
14+
{
15+
await fixture.InitializeAsync();
16+
_context = await fixture.GetService<IDbContextFactory<LcmCrdtDbContext>>().CreateDbContextAsync();
17+
}
18+
19+
/// <summary>
20+
/// Validates that HeadwordSearchValue translates to SQL correctly,
21+
/// specifically that string concatenation with json_each values works.
22+
/// </summary>
23+
[Theory]
24+
[InlineData("-", "", "-ing", true)] // suffix: leading="-", query="-ing" matches lexeme "ing"
25+
[InlineData("", "-", "ing-", true)] // prefix: trailing="-", query="ing-" matches lexeme "ing"
26+
[InlineData("-", "", "ing", true)] // query without token still matches raw lexeme
27+
[InlineData("-", "", "-xyz", false)] // "-xyz" should not match lexeme "ing"
28+
[InlineData("", "", "ing", true)] // no tokens, matches raw lexeme
29+
[InlineData("-", "", "-fra", true)] // matches French lexeme "fra" with token
30+
public async Task HeadwordSearchValue_WithTokens_MatchesCorrectly(
31+
string leading, string trailing, string query, bool shouldMatch)
32+
{
33+
var id = Guid.NewGuid();
34+
_context.Set<Entry>().Add(new Entry
35+
{
36+
Id = id,
37+
LexemeForm = { ["en"] = "ing", ["fr"] = "fra" },
38+
MorphType = MorphType.Suffix
39+
});
40+
await _context.SaveChangesAsync();
41+
42+
// Run the query using HeadwordSearchValue — this exercises the SQL translation
43+
var results = await _context.GetTable<Entry>()
44+
.Where(e => e.Id == id && e.HeadwordSearchValue(leading, trailing, query))
45+
.ToListAsyncLinqToDB();
46+
47+
if (shouldMatch)
48+
results.Should().ContainSingle(e => e.Id == id);
49+
else
50+
results.Should().BeEmpty();
51+
}
52+
53+
/// <summary>
54+
/// CitationForm takes priority — if CitationForm matches, it should match
55+
/// even without morph tokens.
56+
/// </summary>
57+
[Fact]
58+
public async Task HeadwordSearchValue_CitationFormMatchesWithoutTokens()
59+
{
60+
var id = Guid.NewGuid();
61+
_context.Set<Entry>().Add(new Entry
62+
{
63+
Id = id,
64+
CitationForm = { ["en"] = "running" },
65+
LexemeForm = { ["en"] = "run" },
66+
MorphType = MorphType.Stem
67+
});
68+
await _context.SaveChangesAsync();
69+
70+
var results = await _context.GetTable<Entry>()
71+
.Where(e => e.Id == id && e.HeadwordSearchValue("-", "", "running"))
72+
.ToListAsyncLinqToDB();
73+
74+
results.Should().ContainSingle(e => e.Id == id);
75+
}
76+
77+
/// <summary>
78+
/// Tests the scenario from code review: main WS is "es" but the match
79+
/// should come from "en" headword with morph tokens.
80+
/// </summary>
81+
[Fact]
82+
public async Task HeadwordSearchValue_MatchesNonPrimaryWs()
83+
{
84+
var id = Guid.NewGuid();
85+
_context.Set<Entry>().Add(new Entry
86+
{
87+
Id = id,
88+
LexemeForm = { ["en"] = "ing", ["es"] = "abc" },
89+
MorphType = MorphType.Suffix
90+
});
91+
await _context.SaveChangesAsync();
92+
93+
// "-ing" should match via English headword even if primary WS is Spanish
94+
var results = await _context.GetTable<Entry>()
95+
.Where(e => e.Id == id && e.HeadwordSearchValue("-", "", "-ing"))
96+
.ToListAsyncLinqToDB();
97+
98+
results.Should().ContainSingle(e => e.Id == id);
99+
}
100+
101+
public async Task DisposeAsync()
102+
{
103+
await _context.DisposeAsync();
104+
await fixture.DisposeAsync();
105+
}
106+
}

backend/FwLite/LcmCrdt/Data/EntryQueryHelpers.cs

Lines changed: 19 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -37,46 +37,32 @@ public static string HeadwordWithTokens(this Entry e, WritingSystemId ws, string
3737
: ((leading ?? "") + Json.Value(e.LexemeForm, ms => ms[ws]) + (trailing ?? "")).Trim()
3838
: Json.Value(e.CitationForm, ms => ms[ws])!.Trim();
3939

40-
// === Morph-token-aware headword expressions (ready for when MorphTypeData is a CRDT entity) ===
40+
// === Morph-token-aware headword expressions ===
4141
//
42-
// Once MorphTypeData is in the DB, use these in place of the token-free versions above.
43-
//
44-
// Usage in queries (e.g. EntrySearchService.FilterInternal, Filtering.cs):
45-
//
46-
// var morphTypes = dbContext.GetTable<MorphTypeData>();
47-
//
48-
// from entry in entries
49-
// from morphData in morphTypes.LeftJoin(m => (int)m.MorphType == (int)entry.MorphType)
50-
// let headword = entry.HeadwordWithTokens(wsId, morphData.LeadingToken, morphData.TrailingToken)
51-
// ...
52-
//
53-
// For searching headwords (with tokens) across ALL writing systems:
54-
//
55-
// [ExpressionMethod(nameof(HeadwordSearchValueExpression))]
56-
// public static bool HeadwordSearchValue(Entry e, string? leading, string? trailing, string query)
57-
// {
58-
// return e.CitationForm.SearchValue(query)
59-
// || e.LexemeForm.Values.Any(kvp =>
60-
// SqlHelpers.ContainsIgnoreCaseAccents((leading ?? "") + kvp.Value + (trailing ?? ""), query));
61-
// }
62-
//
63-
// private static Expression<Func<Entry, string?, string?, string, bool>> HeadwordSearchValueExpression() =>
64-
// (e, leading, trailing, query) =>
65-
// Json.QueryValues(e.CitationForm).Any(v => SqlHelpers.ContainsIgnoreCaseAccents(v, query))
66-
// || Json.QueryValues(e.LexemeForm).Any(v =>
67-
// SqlHelpers.ContainsIgnoreCaseAccents((leading ?? "") + v + (trailing ?? ""), query));
68-
//
69-
// Then in queries:
42+
// HeadwordSearchValue checks headwords (with morph tokens) across ALL writing systems.
43+
// Use with a MorphTypeData JOIN to get the leading/trailing tokens:
7044
//
7145
// from morphData in morphTypes.LeftJoin(m => (int)m.MorphType == (int)entry.MorphType)
7246
// where entry.HeadwordSearchValue(morphData.LeadingToken, morphData.TrailingToken, query)
7347
// || entry.Senses.Any(s => s.Gloss.SearchValue(query))
7448
//
75-
// And for the FTS table (ToEntrySearchRecord), join MorphTypeData to get tokens:
49+
// For per-WS headword with tokens:
7650
//
77-
// from entry in entries
78-
// from morphData in morphTypes.LeftJoin(m => (int)m.MorphType == (int)entry.MorphType)
79-
// let headword = entry.HeadwordWithTokens(ws.WsId, morphData.LeadingToken, morphData.TrailingToken)
51+
// let headword = entry.HeadwordWithTokens(wsId, morphData.LeadingToken, morphData.TrailingToken)
52+
53+
[ExpressionMethod(nameof(HeadwordSearchValueExpression))]
54+
public static bool HeadwordSearchValue(this Entry e, string? leading, string? trailing, string query)
55+
{
56+
return e.CitationForm.SearchValue(query)
57+
|| e.LexemeForm.Values.Any(kvp =>
58+
SqlHelpers.ContainsIgnoreCaseAccents((leading ?? "") + kvp.Value + (trailing ?? ""), query));
59+
}
60+
61+
private static Expression<Func<Entry, string?, string?, string, bool>> HeadwordSearchValueExpression() =>
62+
(e, leading, trailing, query) =>
63+
Json.QueryValues(e.CitationForm).Any(v => SqlHelpers.ContainsIgnoreCaseAccents(v, query))
64+
|| Json.QueryValues(e.LexemeForm).Any(v =>
65+
SqlHelpers.ContainsIgnoreCaseAccents((leading ?? "") + v + (trailing ?? ""), query));
8066

8167
/// <summary>
8268
/// Computes headwords for all writing systems present in CitationForm or LexemeForm,

0 commit comments

Comments
 (0)