Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 36 additions & 10 deletions pkg/driver/browser/cdp/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,29 +155,55 @@ func (d *Driver) dispatchCrossRoot(elem *rod.Element, info *core.ElementInfo, de
return errorResult(err, fmt.Sprintf("Failed to dispatch input for %s", desc))
}

// Step 4: poll hit-target verify.
// Step 4: poll for verify result. Chromium delivers trusted events
// synchronously during Mouse.Click, so the first poll is usually
// decisive. Brief retry window absorbs scheduler jitter on slower
// machines / CI.
//
// pollHitTargetResult always returns an object with a `status` field:
// { status: 'done' }
// { status: 'pending', inFlutter: bool }
// { status: 'failed', hitTargetDescription: string }
inFlutter := false
for i := 0; i < 5; i++ {
pollRes, pollErr := d.page.Eval(`(t) => window.__maestro.pollHitTargetResult(t)`, token)
if pollErr != nil {
return errorResult(pollErr, fmt.Sprintf("Failed to poll hit-target result for %s", desc))
}
v := pollRes.Value
if v.Has("hitTargetDescription") {
hd := v.Get("hitTargetDescription").Str()
return errorResult(
fmt.Errorf("input did not reach %s — landed on %s", desc, hd),
fmt.Sprintf("Input on %s did not reach the target (landed on %s)", desc, hd))
}
switch v.Str() {
switch v.Get("status").Str() {
case "pending":
if v.Get("inFlutter").Bool() {
inFlutter = true
}
time.Sleep(20 * time.Millisecond)
continue
case "done":
return successResult(fmt.Sprintf("%s on %s", verbed, desc), info)
default:
return successResult(fmt.Sprintf("%s on %s", verbed, desc), info)
case "failed":
hd := v.Get("hitTargetDescription").Str()
return errorResult(
fmt.Errorf("input did not reach %s — landed on %s", desc, hd),
fmt.Sprintf("Input on %s did not reach the target (landed on %s)", desc, hd))
}
}
// Flutter Web concession (post-click): the trusted event verifier never
// captured a pointerdown/mousedown on the target frame's window. For
// Flutter targets this is the expected steady state — Flutter's pointer
// router sits at the document/flutter-view capture layer and routes the
// trusted event to its own internal hit testing for semantics dispatch;
// it generally does not bubble back out as a window-level
// pointerdown/mousedown that a third-party listener can observe. Pre-
// flight expectHitTarget already validated the static hit point and
// applied the same Flutter concession (jshelper.js:expectHitTarget). So
// when we got past pre-flight and the target lives in Flutter, accept
// the dispatch — Chromium delivered a trusted click at the target's
// coordinates and Flutter handled it. Living in dispatchCrossRoot means
// doubleTapOn / longPressOn / scrollUntilVisible inherit the concession
// for free, since they share this dispatch path.
if inFlutter {
return successResult(fmt.Sprintf("%s on %s", verbed, desc), info)
}
return errorResult(
fmt.Errorf("hit-target verify did not capture trusted event within timeout"),
fmt.Sprintf("Input on %s dispatched but verification timed out", desc))
Expand Down
31 changes: 20 additions & 11 deletions pkg/driver/browser/cdp/finder.go
Original file line number Diff line number Diff line change
Expand Up @@ -511,14 +511,19 @@ func (d *Driver) findByAXTree(text, role string, sel flow.Selector) (*rod.Elemen

// findBySearch uses Rod's page.Search() which handles Shadow DOM via DOMPerformSearch.
//
// Reject IFRAME results: CDP DOM.performSearch matches against the iframe
// element's serialized attributes, including srcdoc — for an iframe whose
// srcdoc HTML happens to contain the search text, the iframe element itself
// is returned. That is essentially never what the caller wants (the caller
// is looking for a tappable text element, not the iframe wrapper). Falling
// through here lets the cascade reach the JS findByText path, which walks
// _collectRoots() into the iframe document and shadow DOM and returns the
// actual visible match. (Issues #71/#72 acting layer.)
// Reject non-tappable text containers: CDP DOM.performSearch matches against
// the serialized HTML of every node, including the source text of <script>
// and <style> blocks and the srcdoc attribute of <iframe>. For a page whose
// JS source happens to contain "Close" (a button label coincidentally also
// found in the source code), the <script> element itself is returned — it's
// never visible, never tappable, and any subsequent click translates to
// nonsense coordinates that the hit-target verifier then reports as occluded
// by whatever real content sits at the page origin. Same shape for <style>,
// <template>, and other inert text containers; same shape for <iframe> when
// its srcdoc contains the search text. Falling through here lets the cascade
// reach the JS findByText path, which walks _collectRoots() into the iframe
// document and shadow DOM and returns the actual visible match. (Issues
// #71/#72 acting layer.)
func (d *Driver) findBySearch(text string, sel flow.Selector) (*rod.Element, *core.ElementInfo, error) {
p := d.page.Timeout(2 * time.Second)
res, err := p.Search(text)
Expand All @@ -532,9 +537,13 @@ func (d *Driver) findBySearch(text string, sel flow.Selector) (*rod.Element, *co
}

elem := res.First
if tag, _ := elem.Eval(`() => this.tagName`); tag != nil &&
(tag.Value.Str() == "IFRAME" || tag.Value.Str() == "FRAME") {
return nil, nil, fmt.Errorf("search returned iframe element (likely srcdoc text match) — falling through to JS findByText")
if tag, _ := elem.Eval(`() => this.tagName`); tag != nil {
switch tag.Value.Str() {
case "IFRAME", "FRAME":
return nil, nil, fmt.Errorf("search returned iframe element (likely srcdoc text match) — falling through to JS findByText")
case "SCRIPT", "STYLE", "TEMPLATE", "NOSCRIPT", "TITLE", "META", "LINK", "HEAD":
return nil, nil, fmt.Errorf("search returned non-tappable %s element (text matched in source/inert content) — falling through to JS findByText", strings.ToLower(tag.Value.Str()))
}
}
if !d.matchesStateFilters(elem, sel) {
return nil, nil, fmt.Errorf("search found element but state filters don't match")
Expand Down
70 changes: 60 additions & 10 deletions pkg/driver/browser/cdp/jshelper.js
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,25 @@ window.__maestro = {
return null;
},

// _isInFlutterContext: walks up (piercing shadow boundaries) and returns
// true if the node lives inside a Flutter Web app — i.e. has any ancestor
// with a Flutter-namespaced tag (`flutter-view` or `flt-*`). Real Flutter
// Web DOMs vary by embedder/version: in some <flutter-view> wraps both the
// canvas and <flt-semantics-host>; in others <flt-semantics-host> is a
// SIBLING of <flutter-view> (semantics tree rendered separately for screen
// readers). Either layout is "inside Flutter" for hit-target purposes —
// Flutter's pointer router intercepts events through its glass pane and
// dispatches to semantics via its own internal hit testing.
_isInFlutterContext: function(node) {
var n = node;
while (n) {
var tag = n.tagName ? n.tagName.toLowerCase() : '';
if (tag === 'flutter-view' || tag.indexOf('flt-') === 0) return true;
n = this._parentElementOrShadowHost(n);
}
return false;
},

// _isInIframe: cheap check used by Go to decide whether to use the new
// coord-translated dispatch path. True iff the element's owner document has
// a non-null frameElement (i.e. it lives inside an iframe at any depth).
Expand Down Expand Up @@ -624,7 +643,24 @@ window.__maestro = {
hitElement = hitElement.assignedSlot || this._parentElementOrShadowHost(hitElement);
}
if (hitElement === targetElement) return 'done';
var description = this._previewNode(hitParents[0] || document.documentElement);
// Flutter Web concession: <flutter-view> (and the legacy <flt-glass-pane>)
// acts as a glass pane that intercepts every pointer event and routes it
// to the appropriate <flt-semantics> action via Flutter's internal hit
// testing. The accessibility tree may live in light DOM under flutter-
// view, in its shadow root, or as a sibling <flt-semantics-host> with the
// rendering canvas stacked above — either way DOM elementsFromPoint at a
// semantics target returns the canvas / glass pane, never the semantics
// node. A strict same-element walk-up would therefore always report false
// occlusion and refuse to dispatch. Accept when both target and the hit
// element are inside the same Flutter app's iframe — Flutter will route
// the trusted click to the right semantics action.
var topHit = hitParents[0];
if (topHit && this._isInFlutterContext(targetElement) &&
this._isInFlutterContext(topHit) &&
targetElement.ownerDocument === topHit.ownerDocument) {
return 'done';
}
var description = this._previewNode(topHit || document.documentElement);
return { hitTargetDescription: description };
},

Expand Down Expand Up @@ -694,7 +730,11 @@ window.__maestro = {
var token = this._hitTargetNextToken++;
var win = targetElement.ownerDocument && targetElement.ownerDocument.defaultView;
if (!win) return { error: '<target window unavailable>' };
var state = { captured: false, result: undefined, target: targetElement, win: win };
var inFlutter = this._isInFlutterContext(targetElement);
var state = {
captured: false, result: undefined, target: targetElement, win: win,
inFlutter: inFlutter
};
this._hitTargetState[token] = state;

var listener = function(ev) {
Expand Down Expand Up @@ -722,17 +762,21 @@ window.__maestro = {
return { token: token };
},

// pollHitTargetResult: returns the captured verify outcome for a token, or
// 'pending' if the listener hasn't fired yet. Caller should poll a few
// times after dispatch (a real trusted pointerdown is delivered
// synchronously to JS during Mouse.Click, so 'pending' should be rare —
// but a brief retry window absorbs scheduler jitter).
// pollHitTargetResult: returns the captured verify outcome for a token.
// Caller should poll a few times after dispatch (a real trusted pointerdown
// is delivered synchronously to JS during Mouse.Click, so 'pending' should
// be rare — but a brief retry window absorbs scheduler jitter).
//
// Always returns an object with a `status` field:
// { status: 'done' } → success
// { status: 'pending', inFlutter: bool } → trusted event not seen yet
// { status: 'failed', hitTargetDescription: string } → landed on the wrong element
//
// After returning a non-'pending' result, the token is cleaned up.
pollHitTargetResult: function(token) {
var state = this._hitTargetState[token];
if (!state) return 'done'; // unknown token → don't block caller
if (!state.captured) return 'pending';
if (!state) return { status: 'done' }; // unknown token → don't block caller
if (!state.captured) return { status: 'pending', inFlutter: !!state.inFlutter };
var r = state.result;
// Cleanup listener if still attached (defensive — listener removes itself
// on capture, but if the click never fired we want to stop leaking).
Expand All @@ -741,7 +785,13 @@ window.__maestro = {
state.win.removeEventListener('mousedown', state.listener, true);
} catch (e) {}
delete this._hitTargetState[token];
return r;
// Normalize the listener-stashed expectHitTarget result into the unified
// shape. expectHitTarget returns 'done' or { hitTargetDescription }.
if (r === 'done') return { status: 'done' };
if (r && typeof r === 'object' && 'hitTargetDescription' in r) {
return { status: 'failed', hitTargetDescription: r.hitTargetDescription };
}
return { status: 'done' }; // defensive fallback
},

// disposeHitTargetInterceptor: called by Go to abandon a token without
Expand Down
Loading