Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/basic_memory/api/v2/routers/knowledge_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,12 @@ async def resolve_identifier(
resolution_method = "external_id" if entity else "search"

# If not found by external_id, try other resolution methods
# Pass source_path for context-aware resolution (prefers notes closer to source)
# Pass strict to control fuzzy search fallback (default False allows fuzzy matching)
if not entity:
entity = await link_resolver.resolve_link(data.identifier)
entity = await link_resolver.resolve_link(
data.identifier, source_path=data.source_path, strict=data.strict
)
if entity:
# Determine resolution method
if entity.permalink == data.identifier:
Expand Down
15 changes: 12 additions & 3 deletions src/basic_memory/repository/entity_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


from loguru import logger
from sqlalchemy import select
from sqlalchemy import select, func
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from sqlalchemy.orm import selectinload
Expand Down Expand Up @@ -69,12 +69,21 @@ async def get_by_permalink(self, permalink: str) -> Optional[Entity]:
return await self.find_one(query)

async def get_by_title(self, title: str) -> Sequence[Entity]:
"""Get entity by title.
"""Get entities by title, ordered by shortest path first.

When multiple entities share the same title (in different folders),
returns them ordered by file_path length then alphabetically.
This provides "shortest path" resolution for duplicate titles.

Args:
title: Title of the entity to find
"""
query = self.select().where(Entity.title == title).options(*self.get_load_options())
query = (
self.select()
.where(Entity.title == title)
.order_by(func.length(Entity.file_path), Entity.file_path)
.options(*self.get_load_options())
)
result = await self.execute_query(query)
return list(result.scalars().all())

Expand Down
12 changes: 12 additions & 0 deletions src/basic_memory/schemas/v2/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ class EntityResolveRequest(BaseModel):
- Permalinks (e.g., "specs/search")
- Titles (e.g., "Search Specification")
- File paths (e.g., "specs/search.md")

When source_path is provided, resolution prefers notes closer to the source
(context-aware resolution for duplicate titles).
"""

identifier: str = Field(
Expand All @@ -23,6 +26,15 @@ class EntityResolveRequest(BaseModel):
min_length=1,
max_length=500,
)
source_path: Optional[str] = Field(
None,
description="Path of the source file containing the link (for context-aware resolution)",
max_length=500,
)
strict: bool = Field(
False,
description="If True, only exact matches are allowed (no fuzzy search fallback)",
)


class EntityResolveResponse(BaseModel):
Expand Down
138 changes: 134 additions & 4 deletions src/basic_memory/services/link_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,81 @@ def __init__(self, entity_repository: EntityRepository, search_service: SearchSe
self.search_service = search_service

async def resolve_link(
self, link_text: str, use_search: bool = True, strict: bool = False
self,
link_text: str,
use_search: bool = True,
strict: bool = False,
source_path: Optional[str] = None,
) -> Optional[Entity]:
"""Resolve a markdown link to a permalink.

Args:
link_text: The link text to resolve
use_search: Whether to use search-based fuzzy matching as fallback
strict: If True, only exact matches are allowed (no fuzzy search fallback)
source_path: Optional path of the source file containing the link.
Used to prefer notes closer to the source (context-aware resolution).
"""
logger.trace(f"Resolving link: {link_text}")
logger.trace(f"Resolving link: {link_text} (source: {source_path})")

# Clean link text and extract any alias
clean_text, alias = self._normalize_link_text(link_text)

# --- Path Resolution ---
# Note: All paths in Basic Memory are stored as POSIX strings (forward slashes)
# for cross-platform compatibility. See entity_repository.py which normalizes
# paths using Path().as_posix(). This allows consistent path operations here.

# --- Relative Path Resolution ---
# Trigger: source_path is provided AND link contains "/"
# Why: Resolve paths like [[nested/deep-note]] relative to source folder first
# Outcome: [[nested/deep-note]] from testing/link-test.md → testing/nested/deep-note.md
if source_path and "/" in clean_text:
source_folder = source_path.rsplit("/", 1)[0] if "/" in source_path else ""
if source_folder:
# Construct relative path from source folder
relative_path = f"{source_folder}/{clean_text}"

# Try with .md extension
if not relative_path.endswith(".md"):
relative_path_md = f"{relative_path}.md"
entity = await self.entity_repository.get_by_file_path(relative_path_md)
if entity:
return entity

# Try as-is (already has extension or is a permalink)
entity = await self.entity_repository.get_by_file_path(relative_path)
if entity:
return entity

# When source_path is provided, use context-aware resolution:
# Check both permalink and title matches, prefer closest to source.
# Example: [[testing]] from folder/note.md prefers folder/testing.md
# over a root testing.md with permalink "testing".
if source_path:
# Gather all potential matches
candidates: list[Entity] = []

# Check permalink match
permalink_entity = await self.entity_repository.get_by_permalink(clean_text)
if permalink_entity:
candidates.append(permalink_entity)

# Check title matches
title_entities = await self.entity_repository.get_by_title(clean_text)
for entity in title_entities:
# Avoid duplicates (permalink match might also be in title matches)
if entity.id not in [c.id for c in candidates]:
candidates.append(entity)

if candidates:
if len(candidates) == 1:
return candidates[0]
else:
# Multiple candidates - pick closest to source
return self._find_closest_entity(candidates, source_path)

# Standard resolution (no source context): permalink first, then title
# 1. Try exact permalink match first (most efficient)
entity = await self.entity_repository.get_by_permalink(clean_text)
if entity:
Expand All @@ -51,7 +112,7 @@ async def resolve_link(
# 2. Try exact title match
found = await self.entity_repository.get_by_title(clean_text)
if found:
# Return first match if there are duplicates (consistent behavior)
# Return first match (shortest path) if no source context
entity = found[0]
logger.debug(f"Found title match: {entity.title}")
return entity
Expand Down Expand Up @@ -108,7 +169,7 @@ def _normalize_link_text(self, link_text: str) -> Tuple[str, Optional[str]]:
if text.startswith("[[") and text.endswith("]]"):
text = text[2:-2]

# Handle Obsidian-style aliases (format: [[actual|alias]])
# Handle wiki link aliases (format: [[actual|alias]])
alias = None
if "|" in text:
text, alias = text.split("|", 1)
Expand All @@ -119,3 +180,72 @@ def _normalize_link_text(self, link_text: str) -> Tuple[str, Optional[str]]:
text = text.strip()

return text, alias

def _find_closest_entity(self, entities: list[Entity], source_path: str) -> Entity:
"""Find the entity closest to the source file path.

Context-aware resolution: prefer notes in the same folder or closer in hierarchy.

Proximity Scoring Algorithm:
- Priority 0: Same folder as source (best match)
- Priority 1-N: Ancestor folders (N = levels up from source)
- Priority 100+N: Descendant folders (N = levels down, deprioritized)
- Priority 1000: Completely unrelated paths (least preferred)
- Ties are broken by shortest absolute path (consistent behavior)

Args:
entities: List of entities with the same title
source_path: Path of the file containing the link

Returns:
The entity closest to the source path
"""
# Extract source folder (everything before the last /)
source_folder = source_path.rsplit("/", 1)[0] if "/" in source_path else ""

def path_proximity(entity: Entity) -> Tuple[int, int]:
"""Return (proximity_score, path_length) for sorting.

Lower is better for both values.
"""
entity_path = entity.file_path
entity_folder = entity_path.rsplit("/", 1)[0] if "/" in entity_path else ""

# Trigger: entity is in the same folder as source
# Why: same-folder notes are most contextually relevant
# Outcome: priority = 0 (best), ties broken by shortest path
if entity_folder == source_folder:
return (0, len(entity_path))

# Trigger: entity is in an ancestor folder of source
# e.g., source is "a/b/c/file.md", entity is "a/b/note.md" -> ancestor
# Why: ancestors are contextually relevant (shared parent context)
# Outcome: priority = levels_up (1, 2, 3...), closer ancestors preferred
if source_folder.startswith(entity_folder + "/") if entity_folder else source_folder:
# Count how many levels up
if entity_folder:
levels_up = source_folder.count("/") - entity_folder.count("/")
else:
# Root level
levels_up = source_folder.count("/") + 1
return (levels_up, len(entity_path))

# Trigger: entity is in a descendant folder of source
# e.g., source is "a/file.md", entity is "a/b/c/note.md" -> descendant
# Why: descendants are less contextually relevant than ancestors
# Outcome: priority = 100 + levels_down, significantly deprioritized
if entity_folder.startswith(source_folder + "/") if source_folder else entity_folder:
if source_folder:
levels_down = entity_folder.count("/") - source_folder.count("/")
else:
# Source is at root
levels_down = entity_folder.count("/") + 1
return (100 + levels_down, len(entity_path))

# Trigger: entity is in a completely unrelated path
# Why: no folder relationship means minimal contextual relevance
# Outcome: priority = 1000, only selected if no related paths exist
return (1000, len(entity_path))

# Sort by proximity (lower is better), then by path length (shorter is better)
return min(entities, key=path_proximity)
54 changes: 54 additions & 0 deletions tests/api/v2/test_knowledge_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,60 @@ async def test_resolve_identifier_not_found(client: AsyncClient, v2_project_url)
assert "Entity not found" in response.json()["detail"]


@pytest.mark.asyncio
async def test_resolve_identifier_no_fuzzy_match(client: AsyncClient, v2_project_url):
"""Test that resolve uses strict mode - no fuzzy search fallback.

This ensures wiki links only resolve to exact matches (permalink, title, or path),
not to similar-sounding entities via fuzzy search.
"""
# Create an entity with a specific name
entity_data = {
"title": "link-test",
"folder": "testing",
"content": "A test note",
}
response = await client.post(f"{v2_project_url}/knowledge/entities", json=entity_data)
assert response.status_code == 200

# Try to resolve "nonexistent" - should NOT fuzzy match to "link-test"
resolve_data = {"identifier": "nonexistent"}
response = await client.post(f"{v2_project_url}/knowledge/resolve", json=resolve_data)

# Must return 404, not a fuzzy match to "link-test"
assert response.status_code == 404
assert "Entity not found" in response.json()["detail"]


@pytest.mark.asyncio
async def test_resolve_identifier_with_source_path_no_fuzzy_match(client: AsyncClient, v2_project_url):
"""Test that context-aware resolution also uses strict mode.

Even with source_path for context-aware resolution, nonexistent
links should return 404, not fuzzy match to nearby entities.
"""
# Create entities in a folder structure
entity_data = {
"title": "link-test",
"folder": "testing/nested",
"content": "A nested test note",
}
response = await client.post(f"{v2_project_url}/knowledge/entities", json=entity_data)
assert response.status_code == 200

# Try to resolve "nonexistent" with source_path context
# Should NOT fuzzy match to "link-test" in the same or nearby folder
resolve_data = {
"identifier": "nonexistent",
"source_path": "testing/nested/other-note.md",
}
response = await client.post(f"{v2_project_url}/knowledge/resolve", json=resolve_data)

# Must return 404, not a fuzzy match
assert response.status_code == 404
assert "Entity not found" in response.json()["detail"]


@pytest.mark.asyncio
async def test_get_entity_by_id(client: AsyncClient, test_graph, v2_project_url, entity_repository):
"""Test getting an entity by its external_id (UUID)."""
Expand Down
60 changes: 60 additions & 0 deletions tests/repository/test_entity_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,66 @@ async def test_get_by_title(entity_repository: EntityRepository, session_maker):
assert len(found) == 2


@pytest.mark.asyncio
async def test_get_by_title_returns_shortest_path_first(
entity_repository: EntityRepository, session_maker
):
"""Test that duplicate titles are returned with shortest path first.

When multiple entities share the same title in different folders,
the one with the shortest file path should be returned first.
This provides consistent, predictable link resolution.
"""
async with db.scoped_session(session_maker) as session:
# Create entities with same title but different path lengths
# Insert in reverse order to ensure we're testing ordering, not insertion order
entities = [
Entity(
project_id=entity_repository.project_id,
title="My Note",
entity_type="note",
permalink="archive/old/2024/my-note",
file_path="archive/old/2024/My Note.md", # longest path
content_type="text/markdown",
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
),
Entity(
project_id=entity_repository.project_id,
title="My Note",
entity_type="note",
permalink="docs/my-note",
file_path="docs/My Note.md", # medium path
content_type="text/markdown",
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
),
Entity(
project_id=entity_repository.project_id,
title="My Note",
entity_type="note",
permalink="my-note",
file_path="My Note.md", # shortest path (root)
content_type="text/markdown",
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
),
]
session.add_all(entities)
await session.flush()

# Get all entities with title "My Note"
found = await entity_repository.get_by_title("My Note")

# Should return all 3
assert len(found) == 3

# Should be ordered by path length (shortest first)
assert found[0].file_path == "My Note.md" # shortest
assert found[1].file_path == "docs/My Note.md" # medium
assert found[2].file_path == "archive/old/2024/My Note.md" # longest


@pytest.mark.asyncio
async def test_get_by_file_path(entity_repository: EntityRepository, session_maker):
"""Test getting an entity by title."""
Expand Down
Loading
Loading