From d971f6e5611a5097526f41cd542d0e4f1e8a1de0 Mon Sep 17 00:00:00 2001 From: Tim Waugh Date: Mon, 16 Mar 2026 10:04:36 +0000 Subject: [PATCH] Include full pages as search results using block centroids Compute page embeddings as the normalized centroid of all block embeddings for each page (skipping pages with fewer than 2 blocks). Page results are merged with block results by similarity score. - New IndexedDB store for page embeddings (DB version 2) - Page results show a "Page" badge and preview of first few blocks - Copy button produces [[page name]] for pages, ((block-id)) for blocks - Click navigates to page, shift+click opens in sidebar - Journal checkbox filters both block and page results Co-Authored-By: Claude Opus 4.6 --- src/__tests__/search.test.ts | 31 +++++- src/__tests__/storage.test.ts | 25 +++++ src/indexer.ts | 63 +++++++++++- src/main.ts | 3 +- src/search.ts | 31 +++++- src/storage.ts | 56 ++++++++++- src/ui.ts | 176 ++++++++++++++++++++++++++-------- styles/search-modal.css | 12 +++ 8 files changed, 350 insertions(+), 47 deletions(-) diff --git a/src/__tests__/search.test.ts b/src/__tests__/search.test.ts index 9917c73..9eff308 100644 --- a/src/__tests__/search.test.ts +++ b/src/__tests__/search.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from "vitest"; -import { dotProduct, searchEmbeddings } from "../search"; -import type { EmbeddingRecord } from "../storage"; +import { dotProduct, searchEmbeddings, searchPageEmbeddings } from "../search"; +import type { EmbeddingRecord, PageEmbeddingRecord } from "../storage"; describe("dotProduct", () => { it("computes dot product correctly", () => { @@ -53,3 +53,30 @@ describe("searchEmbeddings", () => { expect(results).toHaveLength(1); }); }); + +describe("searchPageEmbeddings", () => { + const pages: PageEmbeddingRecord[] = [ + { pageId: 1, pageName: "Page A", embedding: [1, 0, 0], isJournal: false, blockCount: 5, timestamp: 0 }, + { pageId: 2, pageName: "Page B", embedding: [0, 1, 0], isJournal: true, blockCount: 3, timestamp: 0 }, + { pageId: 3, pageName: "Page C", embedding: [0.7, 0.7, 0], isJournal: false, blockCount: 2, timestamp: 0 }, + ]; + + it("returns page results sorted by similarity", () => { + const results = searchPageEmbeddings([1, 0, 0], pages, 10, 0); + expect(results[0].pageName).toBe("Page A"); + expect(results[0].similarity).toBe(1); + expect(results[1].pageName).toBe("Page C"); + }); + + it("includes isJournal flag", () => { + const results = searchPageEmbeddings([0, 1, 0], pages, 10, 0); + expect(results[0].pageName).toBe("Page B"); + expect(results[0].isJournal).toBe(true); + }); + + it("respects topK and threshold", () => { + const results = searchPageEmbeddings([1, 0, 0], pages, 1, 0.5); + expect(results).toHaveLength(1); + expect(results[0].pageName).toBe("Page A"); + }); +}); diff --git a/src/__tests__/storage.test.ts b/src/__tests__/storage.test.ts index 481cad0..adbb620 100644 --- a/src/__tests__/storage.test.ts +++ b/src/__tests__/storage.test.ts @@ -6,6 +6,9 @@ import { getAllEmbeddings, deleteEmbeddings, clearAllEmbeddings, + putPageEmbeddings, + getAllPageEmbeddings, + clearAllPageEmbeddings, getMetadata, setMetadata, getEmbeddingCount, @@ -82,6 +85,28 @@ describe("embeddings CRUD", () => { }); }); +describe("page embeddings CRUD", () => { + it("stores and retrieves page embeddings", async () => { + await putPageEmbeddings([ + { pageId: 1, pageName: "Page A", embedding: [0.1, 0.2], isJournal: false, blockCount: 5, timestamp: Date.now() }, + { pageId: 2, pageName: "Page B", embedding: [0.3, 0.4], isJournal: true, blockCount: 3, timestamp: Date.now() }, + ]); + const all = await getAllPageEmbeddings(); + expect(all).toHaveLength(2); + expect(all[0].pageName).toBe("Page A"); + expect(all[1].isJournal).toBe(true); + }); + + it("clears all page embeddings", async () => { + await putPageEmbeddings([ + { pageId: 1, pageName: "Page A", embedding: [0.1], isJournal: false, blockCount: 2, timestamp: 0 }, + ]); + await clearAllPageEmbeddings(); + const all = await getAllPageEmbeddings(); + expect(all).toHaveLength(0); + }); +}); + describe("metadata", () => { it("stores and retrieves metadata", async () => { await setMetadata("model", "nomic-embed-text"); diff --git a/src/indexer.ts b/src/indexer.ts index 852e31a..6eb36d1 100644 --- a/src/indexer.ts +++ b/src/indexer.ts @@ -6,10 +6,13 @@ import { getAllEmbeddings, deleteEmbeddings, clearAllEmbeddings, + clearAllPageEmbeddings, + putPageEmbeddings, getMetadata, setMetadata, getEmbeddingCount, } from "./storage"; +import type { PageEmbeddingRecord } from "./storage"; import { getSettings } from "./settings"; export interface IndexingState { @@ -66,6 +69,7 @@ interface PageResult { "original-name"?: string; properties?: Record; "updated-at"?: number; + "journal?": boolean; } const SCHEMA_VERSION = 4; @@ -74,6 +78,7 @@ interface PageInfo { name: string; properties: Record; updatedAt: number; + isJournal: boolean; } interface BlockInfo { @@ -145,6 +150,7 @@ export async function indexBlocks( const storedSchema = await getMetadata("schemaVersion"); if (!storedSchema || (storedSchema as number) < SCHEMA_VERSION) { await clearAllEmbeddings(); + await clearAllPageEmbeddings(); logseq.UI.showMsg("Embedding format changed, re-indexing all blocks..."); await setMetadata("schemaVersion", SCHEMA_VERSION); } @@ -153,6 +159,7 @@ export async function indexBlocks( const storedModel = await getMetadata("model"); if (storedModel && storedModel !== settings.embeddingModel) { await clearAllEmbeddings(); + await clearAllPageEmbeddings(); logseq.UI.showMsg("Model changed, re-indexing all blocks..."); } await setMetadata("model", settings.embeddingModel); @@ -163,7 +170,7 @@ export async function indexBlocks( const blockResults: BlockResult[][] = await logseq.DB.datascriptQuery(blockQuery); // Bulk-fetch all pages - const pageQuery = `[:find (pull ?p [:db/id :block/name :block/original-name :block/properties :block/updated-at]) + const pageQuery = `[:find (pull ?p [:db/id :block/name :block/original-name :block/properties :block/updated-at :block/journal?]) :where [?p :block/name _]]`; const pageResults: PageResult[][] = await logseq.DB.datascriptQuery(pageQuery); @@ -193,6 +200,7 @@ export async function indexBlocks( name: page.originalName ?? page["original-name"] ?? page.name ?? "", properties: page.properties ?? {}, updatedAt: page["updated-at"] ?? 0, + isJournal: page["journal?"] ?? false, }); } @@ -364,6 +372,59 @@ export async function indexBlocks( await deleteEmbeddings(staleIds); } + // Compute page centroids + if (!abort.signal.aborted) { + const allEmbs = staleIds.length > 0 ? await getAllEmbeddings() : allExisting; + const pageGroups = new Map(); + for (const emb of allEmbs) { + let group = pageGroups.get(emb.pageId); + if (!group) { + group = []; + pageGroups.set(emb.pageId, group); + } + group.push(emb.embedding); + } + + const pageRecords: PageEmbeddingRecord[] = []; + for (const [pageId, embeddings] of pageGroups) { + if (embeddings.length < 2) continue; + const pageInfo = pageMap.get(pageId); + if (!pageInfo) continue; + + const dim = embeddings[0].length; + const centroid = new Array(dim).fill(0); + for (const emb of embeddings) { + for (let i = 0; i < dim; i++) { + centroid[i] += emb[i]; + } + } + let norm = 0; + for (let i = 0; i < dim; i++) { + centroid[i] /= embeddings.length; + norm += centroid[i] * centroid[i]; + } + norm = Math.sqrt(norm); + if (norm > 0) { + for (let i = 0; i < dim; i++) { + centroid[i] /= norm; + } + } + + pageRecords.push({ + pageId, + pageName: pageInfo.name, + embedding: centroid, + isJournal: pageInfo.isJournal, + blockCount: embeddings.length, + timestamp: Date.now(), + }); + } + + if (pageRecords.length > 0) { + await putPageEmbeddings(pageRecords); + } + } + const count = await getEmbeddingCount(); await setMetadata("blockCount", count); await setMetadata("lastIndexed", Date.now()); diff --git a/src/main.ts b/src/main.ts index 1820f87..e4ec35c 100644 --- a/src/main.ts +++ b/src/main.ts @@ -2,7 +2,7 @@ import "@logseq/libs"; import { settingsSchema } from "./settings"; import { getSettings } from "./settings"; import { indexBlocks } from "./indexer"; -import { setGraphName, clearAllEmbeddings } from "./storage"; +import { setGraphName, clearAllEmbeddings, clearAllPageEmbeddings } from "./storage"; import { createSearchModal, showModal } from "./ui"; async function main() { @@ -45,6 +45,7 @@ async function main() { // Register rebuild command logseq.App.registerCommandPalette({ key: "rebuild-index", label: "Semantic Search: Rebuild index" }, async () => { await clearAllEmbeddings(); + await clearAllPageEmbeddings(); logseq.UI.showMsg("Rebuilding index..."); try { await indexBlocks(); diff --git a/src/search.ts b/src/search.ts index 6714a97..67cab8c 100644 --- a/src/search.ts +++ b/src/search.ts @@ -1,4 +1,4 @@ -import type { EmbeddingRecord } from "./storage"; +import type { EmbeddingRecord, PageEmbeddingRecord } from "./storage"; export interface SearchResult { blockId: string; @@ -6,6 +6,13 @@ export interface SearchResult { similarity: number; } +export interface PageSearchResult { + pageId: number; + pageName: string; + isJournal: boolean; + similarity: number; +} + export function dotProduct(a: number[], b: number[]): number { let sum = 0; for (let i = 0; i < a.length; i++) { @@ -34,3 +41,25 @@ export function searchEmbeddings( scored.sort((a, b) => b.similarity - a.similarity); return scored.slice(0, topK); } + +export function searchPageEmbeddings( + queryEmbedding: number[], + records: PageEmbeddingRecord[], + topK: number, + threshold = 0.3, +): PageSearchResult[] { + const scored: PageSearchResult[] = []; + for (const record of records) { + const similarity = dotProduct(queryEmbedding, record.embedding); + if (similarity >= threshold) { + scored.push({ + pageId: record.pageId, + pageName: record.pageName, + isJournal: record.isJournal, + similarity, + }); + } + } + scored.sort((a, b) => b.similarity - a.similarity); + return scored.slice(0, topK); +} diff --git a/src/storage.ts b/src/storage.ts index 75e1812..fd8c045 100644 --- a/src/storage.ts +++ b/src/storage.ts @@ -1,9 +1,11 @@ const DB_PREFIX = "semantic-search-embeddings"; -const DB_VERSION = 1; +const DB_VERSION = 2; const EMBEDDINGS_STORE = "embeddings"; const METADATA_STORE = "metadata"; +const PAGE_EMBEDDINGS_STORE = "pageEmbeddings"; let graphName = ""; +let pageCache: PageEmbeddingRecord[] | null = null; export function setGraphName(name: string): void { graphName = name; @@ -22,6 +24,15 @@ export interface EmbeddingRecord { pageUpdatedAt: number; } +export interface PageEmbeddingRecord { + pageId: number; + pageName: string; + embedding: number[]; + isJournal: boolean; + blockCount: number; + timestamp: number; +} + export interface MetadataRecord { key: string; value: string | number; @@ -38,6 +49,9 @@ function openDB(): Promise { if (!db.objectStoreNames.contains(METADATA_STORE)) { db.createObjectStore(METADATA_STORE, { keyPath: "key" }); } + if (!db.objectStoreNames.contains(PAGE_EMBEDDINGS_STORE)) { + db.createObjectStore(PAGE_EMBEDDINGS_STORE, { keyPath: "pageId" }); + } }; request.onsuccess = () => resolve(request.result); request.onerror = () => reject(request.error); @@ -135,6 +149,46 @@ export async function clearAllEmbeddings(): Promise { }); } +export async function putPageEmbeddings(records: PageEmbeddingRecord[]): Promise { + const db = await openDB(); + return new Promise((resolve, reject) => { + const tx = db.transaction(PAGE_EMBEDDINGS_STORE, "readwrite"); + const store = tx.objectStore(PAGE_EMBEDDINGS_STORE); + for (const record of records) { + store.put(record); + } + tx.oncomplete = () => { db.close(); pageCache = null; resolve(); }; + tx.onerror = () => { db.close(); reject(tx.error); }; + }); +} + +export async function getAllPageEmbeddings(): Promise { + if (pageCache) return pageCache; + const db = await openDB(); + return new Promise((resolve, reject) => { + const tx = db.transaction(PAGE_EMBEDDINGS_STORE, "readonly"); + const store = tx.objectStore(PAGE_EMBEDDINGS_STORE); + const req = store.getAll(); + req.onsuccess = () => { + pageCache = req.result; + resolve(pageCache); + }; + req.onerror = () => reject(req.error); + tx.oncomplete = () => db.close(); + }); +} + +export async function clearAllPageEmbeddings(): Promise { + const db = await openDB(); + return new Promise((resolve, reject) => { + const tx = db.transaction(PAGE_EMBEDDINGS_STORE, "readwrite"); + const store = tx.objectStore(PAGE_EMBEDDINGS_STORE); + store.clear(); + tx.oncomplete = () => { db.close(); pageCache = []; resolve(); }; + tx.onerror = () => { db.close(); reject(tx.error); }; + }); +} + export async function getEmbeddingCount(): Promise { const db = await openDB(); return new Promise((resolve, reject) => { diff --git a/src/ui.ts b/src/ui.ts index e0dc7a8..80224a6 100644 --- a/src/ui.ts +++ b/src/ui.ts @@ -1,11 +1,15 @@ import { debounce } from "./utils"; import { embedTexts } from "./embeddings"; -import { getAllEmbeddings, getEmbeddingCount } from "./storage"; -import { searchEmbeddings, type SearchResult } from "./search"; +import { getAllEmbeddings, getAllPageEmbeddings, getEmbeddingCount } from "./storage"; +import { dotProduct, searchEmbeddings, searchPageEmbeddings } from "./search"; import { indexBlocks, indexingState, acquireSearchPriority, releaseSearchPriority } from "./indexer"; import { getSettings } from "./settings"; -interface DisplayResult extends SearchResult { +interface DisplayResult { + type: "block" | "page"; + blockId: string; + pageId: number; + similarity: number; pageName: string; content: string; isJournal: boolean; @@ -167,8 +171,14 @@ function handleKeydown(e: KeyboardEvent): void { ); } else if (e.key === "c" && (e.ctrlKey || e.metaKey) && active) { e.preventDefault(); - const blockId = active.getAttribute("data-block-id"); - if (blockId) copyBlockReference(blockId); + const type = active.getAttribute("data-type") as "block" | "page"; + if (type === "page") { + const pageName = active.getAttribute("data-page-name"); + if (pageName) copyReference("page", pageName); + } else { + const blockId = active.getAttribute("data-block-id"); + if (blockId) copyReference("block", blockId); + } } } @@ -246,16 +256,24 @@ async function performSearch(query: string): Promise { releaseSearchPriority(); } - const allEmbeddings = await getAllEmbeddings(); - const results = searchEmbeddings( + const [allEmbeddings, allPageEmbeddings] = await Promise.all([ + getAllEmbeddings(), + getAllPageEmbeddings(), + ]); + const blockResults = searchEmbeddings( queryEmbedding, allEmbeddings, settings.topK, ); + const pageResults = searchPageEmbeddings( + queryEmbedding, + allPageEmbeddings, + settings.topK, + ); // Fetch block details const displayResults: DisplayResult[] = []; - for (const result of results) { + for (const result of blockResults) { try { const block = await logseq.Editor.getBlock(result.blockId); if (!block) continue; @@ -295,6 +313,7 @@ async function performSearch(query: string): Promise { breadcrumbs.push(...ancestors); displayResults.push({ + type: "block", ...result, pageName, content: block.content ?? "", @@ -306,7 +325,46 @@ async function performSearch(query: string): Promise { } } - lastDisplayResults = displayResults; + // Fetch page previews from most relevant blocks + for (const result of pageResults) { + try { + // Find the top 3 most similar blocks in this page + const pageBlocks = allEmbeddings + .filter((e) => e.pageId === result.pageId) + .map((e) => ({ blockId: e.blockId, similarity: dotProduct(queryEmbedding, e.embedding) })) + .sort((a, b) => b.similarity - a.similarity) + .slice(0, 3); + + const previewLines: string[] = []; + for (const pb of pageBlocks) { + try { + const block = await logseq.Editor.getBlock(pb.blockId); + if (block?.content) { + const line = block.content.split("\n")[0]; + previewLines.push(line.length > 80 ? line.slice(0, 80) + "..." : line); + } + } catch { /* skip */ } + } + + displayResults.push({ + type: "page", + blockId: "", + pageId: result.pageId, + similarity: result.similarity, + pageName: result.pageName, + content: previewLines.join(" · "), + isJournal: result.isJournal, + breadcrumbs: [result.pageName], + }); + } catch { + // Skip pages we can't fetch + } + } + + // Sort merged results by similarity + displayResults.sort((a, b) => b.similarity - a.similarity); + // Trim to topK + lastDisplayResults = displayResults.slice(0, settings.topK); renderFilteredResults(); } catch (err) { resultsEl.innerHTML = `
${(err as Error).message}
`; @@ -325,8 +383,13 @@ function renderResults(results: DisplayResult[]): void { resultsEl.innerHTML = ""; for (const result of results) { const item = document.createElement("div"); - item.className = "ss-result-item"; - item.setAttribute("data-block-id", result.blockId); + item.className = result.type === "page" ? "ss-result-item ss-page-result" : "ss-result-item"; + item.setAttribute("data-type", result.type); + if (result.type === "block") { + item.setAttribute("data-block-id", result.blockId); + } else { + item.setAttribute("data-page-name", result.pageName); + } const similarity = Math.round(result.similarity * 100); const preview = @@ -334,44 +397,73 @@ function renderResults(results: DisplayResult[]): void { ? result.content.slice(0, 150) + "..." : result.content; - const breadcrumbHtml = result.breadcrumbs - .map((b) => `${escapeHtml(b)}`) - .join(''); + const copyTitle = result.type === "page" + ? "Copy page reference (Ctrl+C)" + : "Copy block reference (Ctrl+C)"; - item.innerHTML = ` -
- ${similarity}% - ${breadcrumbHtml} -
-
${escapeHtml(preview)}
- - `; + if (result.type === "page") { + item.innerHTML = ` +
+ ${similarity}% + Page + ${escapeHtml(result.pageName)} +
+
${escapeHtml(preview)}
+ + `; + } else { + const breadcrumbHtml = result.breadcrumbs + .map((b) => `${escapeHtml(b)}`) + .join(''); + + item.innerHTML = ` +
+ ${similarity}% + ${breadcrumbHtml} +
+
${escapeHtml(preview)}
+ + `; + } const refBtn = item.querySelector(".ss-ref-btn")!; refBtn.addEventListener("click", (e) => { e.stopPropagation(); - copyBlockReference(result.blockId); + if (result.type === "page") { + copyReference("page", result.pageName); + } else { + copyReference("block", result.blockId); + } }); item.addEventListener("click", async (e) => { - if (e.shiftKey) { + if (result.type === "page") { + if (e.shiftKey) { + try { + const page = await logseq.Editor.getPage(result.pageName); + if (page) logseq.Editor.openInRightSidebar(page.uuid); + } catch { /* ignore */ } + return; + } try { - logseq.Editor.openInRightSidebar(result.blockId); - } catch { - // ignore sidebar errors + logseq.App.pushState("page", { name: result.pageName }); + } catch { /* ignore */ } + } else { + if (e.shiftKey) { + try { + logseq.Editor.openInRightSidebar(result.blockId); + } catch { /* ignore */ } + return; } - return; - } - try { - const block = await logseq.Editor.getBlock(result.blockId); - if (block?.page?.id) { - const page = await logseq.Editor.getPage(block.page.id); - if (page?.name) { - logseq.Editor.scrollToBlockInPage(page.name, result.blockId); + try { + const block = await logseq.Editor.getBlock(result.blockId); + if (block?.page?.id) { + const page = await logseq.Editor.getPage(block.page.id); + if (page?.name) { + logseq.Editor.scrollToBlockInPage(page.name, result.blockId); + } } - } - } catch { - // ignore navigation errors + } catch { /* ignore */ } } hideModal(); }); @@ -395,9 +487,11 @@ function clearResults(): void { lastDisplayResults = []; } -function copyBlockReference(blockId: string): void { - navigator.clipboard.writeText(`((${blockId}))`).then(() => { - logseq.UI.showMsg("Block reference copied to clipboard"); +function copyReference(type: "block" | "page", id: string): void { + const text = type === "page" ? `[[${id}]]` : `((${id}))`; + const label = type === "page" ? "Page" : "Block"; + navigator.clipboard.writeText(text).then(() => { + logseq.UI.showMsg(`${label} reference copied to clipboard`); }); } diff --git a/styles/search-modal.css b/styles/search-modal.css index 15d5d80..47466bd 100644 --- a/styles/search-modal.css +++ b/styles/search-modal.css @@ -146,6 +146,18 @@ white-space: nowrap; } +.ss-page-label { + font-size: 10px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.5px; + padding: 1px 5px; + border-radius: 3px; + background: var(--ls-link-text-color, #4a9eff); + color: #fff; + flex-shrink: 0; +} + .ss-breadcrumb-sep { margin: 0 4px; opacity: 0.6;