Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 56 additions & 3 deletions content/extract-text.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,56 @@
function clean(text) {
return text.replace(/\s+/g, ' ').trim();
}
/**
* Recursively collects visible text from shadow DOM subtrees.
* Extensions such as Monica.im and Cline inject their UI into open shadow
* roots, which are invisible to a regular element's innerText. Walking the
* shadow tree explicitly lets us surface that text for summarise / agent
* workflows.
*/
export function collectShadowText(root) {
const parts = [];
for (const el of Array.from(root.querySelectorAll('*'))) {
if (el.shadowRoot) {
// ShadowRoot has no innerText; collect from its direct Element children.
for (const child of Array.from(el.shadowRoot.children)) {
if (child instanceof HTMLElement) {
const t = child.innerText;
if (t && t.trim()) {
parts.push(t);
}
}
}
// Recurse so nested shadow roots are also captured.
const nested = collectShadowText(el.shadowRoot);
if (nested) {
parts.push(nested);
}
}
}
return parts.join('\n');
}
export function extractSelection() {
const selection = window.getSelection();
const text = selection ? selection.toString() : '';
let text = selection ? selection.toString() : '';
const sel = selection;
if (!text && sel && typeof sel.getComposedRanges === 'function') {
const composed = sel.getComposedRanges(document);
if (composed.length > 0 && !composed[0].collapsed) {
try {
const sr = composed[0];
const liveRange = document.createRange();
liveRange.setStart(sr.startContainer, sr.startOffset);
liveRange.setEnd(sr.endContainer, sr.endOffset);
text = liveRange.toString();
}
catch {
// startContainer / endContainer live inside a shadow root that
// document.createRange cannot span; leave text empty and rely on the
// caller to use extractDocumentText() as a fallback.
}
}
}
return {
text: clean(text),
url: location.href,
Expand All @@ -13,9 +60,15 @@ export function extractSelection() {
export function extractDocumentText() {
const article = document.querySelector('article');
const target = article || document.body;
const text = clean(target.innerText || '');
const parts = [target.innerText || ''];
// Append text from shadow DOM subtrees so content rendered by extensions
// such as Monica.im and Cline (which use shadow roots) is included.
const shadowText = collectShadowText(target);
if (shadowText) {
parts.push(shadowText);
}
return {
text,
text: clean(parts.join('\n')),
url: location.href,
title: document.title
};
Expand Down
70 changes: 67 additions & 3 deletions content/extract-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,65 @@ function clean(text: string): string {
return text.replace(/\s+/g, ' ').trim();
}

/**
* Recursively collects visible text from shadow DOM subtrees.
* Extensions such as Monica.im and Cline inject their UI into open shadow
* roots, which are invisible to a regular element's innerText. Walking the
* shadow tree explicitly lets us surface that text for summarise / agent
* workflows.
*/
export function collectShadowText(root: Element | ShadowRoot): string {
const parts: string[] = [];
for (const el of Array.from(root.querySelectorAll('*'))) {
if (el.shadowRoot) {
// ShadowRoot has no innerText; collect from its direct Element children.
for (const child of Array.from(el.shadowRoot.children)) {
if (child instanceof HTMLElement) {
const t = child.innerText;
if (t && t.trim()) {
parts.push(t);
}
}
}
// Recurse so nested shadow roots are also captured.
const nested = collectShadowText(el.shadowRoot);
if (nested) {
parts.push(nested);
}
}
}
return parts.join('\n');
}

export function extractSelection(): TextExtraction {
const selection = window.getSelection();
const text = selection ? selection.toString() : '';
let text = selection ? selection.toString() : '';

// Fallback for shadow-DOM selections. Chrome 111+ exposes getComposedRanges()
// which crosses shadow-root boundaries. Extensions like Monica.im and Cline
// render their chat UI inside open shadow roots; on some configurations
// selection.toString() returns an empty string even though a non-collapsed
// composed range exists. When that happens we attempt a live-range conversion
// so the selection text is not silently dropped.
type SelectionWithComposed = Selection & { getComposedRanges?: (...args: unknown[]) => StaticRange[] };
const sel = selection as SelectionWithComposed | null;
if (!text && sel && typeof sel.getComposedRanges === 'function') {
const composed = sel.getComposedRanges(document);
if (composed.length > 0 && !composed[0].collapsed) {
try {
const sr = composed[0];
const liveRange = document.createRange();
liveRange.setStart(sr.startContainer, sr.startOffset);
liveRange.setEnd(sr.endContainer, sr.endOffset);
text = liveRange.toString();
} catch {
// startContainer / endContainer live inside a shadow root that
// document.createRange cannot span; leave text empty and rely on the
// caller to use extractDocumentText() as a fallback.
}
}
}

return {
text: clean(text),
url: location.href,
Expand All @@ -21,9 +77,17 @@ export function extractSelection(): TextExtraction {
export function extractDocumentText(): TextExtraction {
const article = document.querySelector('article');
const target = article || document.body;
const text = clean(target.innerText || '');
const parts: string[] = [target.innerText || ''];

// Append text from shadow DOM subtrees so content rendered by extensions
// such as Monica.im and Cline (which use shadow roots) is included.
const shadowText = collectShadowText(target);
if (shadowText) {
parts.push(shadowText);
}

return {
text,
text: clean(parts.join('\n')),
url: location.href,
title: document.title
};
Expand Down
125 changes: 125 additions & 0 deletions tests/panel/extract-text.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/**
* Tests for shadow-DOM text extraction helpers.
*
* collectShadowText is the critical new surface introduced to make
* Monica.im and Cline work: both inject their UI via open shadow roots,
* which are invisible to the standard element.innerText path.
*
* We set up minimal DOM globals so that the function can be exercised
* without a full browser environment.
*/

import test from 'node:test';
import assert from 'node:assert/strict';

// ---------------------------------------------------------------------------
// Minimal DOM stubs – set up before importing the module under test so that
// `instanceof HTMLElement` works inside the module's function bodies.
// ---------------------------------------------------------------------------

class HTMLElement {
constructor(innerText = '') {
this._innerText = innerText;
this.shadowRoot = null;
this.children = [];
this._all = [];
}
get innerText() {
return this._innerText;
}
querySelectorAll() {
return this._all;
}
}

globalThis.HTMLElement = HTMLElement;

function makeShadowRoot(children = []) {
return {
children,
querySelectorAll() {
// Return the flat list of elements stored in _all on each child,
// mimicking a real shadow root's querySelectorAll('*').
const all = [];
function collect(el) {
all.push(el);
for (const c of el._all ?? []) collect(c);
}
for (const child of children) collect(child);
return all;
},
};
}

function makeEl(innerText = '', { shadowRoot = null, all = [] } = {}) {
const el = new HTMLElement(innerText);
el.shadowRoot = shadowRoot;
el._all = all;
return el;
}

// ---------------------------------------------------------------------------
// Dynamic import AFTER globals are in place.
// ---------------------------------------------------------------------------
const { collectShadowText } = await import('../../content/extract-text.js');

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

test('collectShadowText returns empty string when there are no shadow roots', () => {
const root = makeEl('regular text');
assert.equal(collectShadowText(root), '');
});

test('collectShadowText collects text from a single shadow root', () => {
const shadowChild = makeEl('shadow content');
const shadowRoot = makeShadowRoot([shadowChild]);
const host = makeEl('host text', { shadowRoot, all: [] });

// The root container sees the host element via querySelectorAll
const root = makeEl('', { all: [host] });

const result = collectShadowText(root);
assert.ok(result.includes('shadow content'), `Expected shadow text, got: ${result}`);
});

test('collectShadowText ignores shadow root children with blank innerText', () => {
const blankChild = makeEl(' ');
const shadowRoot = makeShadowRoot([blankChild]);
const host = makeEl('', { shadowRoot });
const root = makeEl('', { all: [host] });

assert.equal(collectShadowText(root).trim(), '');
});

test('collectShadowText recurses into nested shadow roots', () => {
// Outer shadow root contains a nested shadow host
const innerShadowChild = makeEl('deeply nested text');
const innerShadowRoot = makeShadowRoot([innerShadowChild]);
const innerHost = makeEl('', { shadowRoot: innerShadowRoot, all: [] });

const outerShadowChild = makeEl('outer shadow text', { all: [innerHost] });
const outerShadowRoot = makeShadowRoot([outerShadowChild]);
const outerHost = makeEl('', { shadowRoot: outerShadowRoot, all: [] });

const root = makeEl('', { all: [outerHost] });

const result = collectShadowText(root);
assert.ok(result.includes('outer shadow text'), `Missing outer text, got: ${result}`);
assert.ok(result.includes('deeply nested text'), `Missing nested text, got: ${result}`);
});

test('collectShadowText collects from multiple independent shadow roots', () => {
const child1 = makeEl('monica text');
const child2 = makeEl('cline text');
const sr1 = makeShadowRoot([child1]);
const sr2 = makeShadowRoot([child2]);
const host1 = makeEl('', { shadowRoot: sr1 });
const host2 = makeEl('', { shadowRoot: sr2 });
const root = makeEl('', { all: [host1, host2] });

const result = collectShadowText(root);
assert.ok(result.includes('monica text'), `Missing monica text, got: ${result}`);
assert.ok(result.includes('cline text'), `Missing cline text, got: ${result}`);
});