From d140e7d321ff7e5d2f55a27927531c74d8b4f94e Mon Sep 17 00:00:00 2001
From: y21 <30553356+y21@users.noreply.github.com>
Date: Sun, 2 Mar 2025 00:38:25 +0100
Subject: [PATCH 1/5] rework regex engine to better understand backtracking

---
 crates/dash_compiler/src/instruction.rs     |   9 +-
 crates/dash_compiler/src/lib.rs             |   2 +-
 crates/dash_decompiler/src/decompiler.rs    |   2 +-
 crates/dash_middle/src/compiler/constant.rs |   6 +-
 crates/dash_middle/src/parser/expr.rs       |   8 +-
 crates/dash_parser/src/expr.rs              |  14 +-
 crates/dash_regex/src/error.rs              |   3 +
 crates/dash_regex/src/flags.rs              |   3 +-
 crates/dash_regex/src/graph/build.rs        | 160 +++++++++++++
 crates/dash_regex/src/graph/eval.rs         | 246 ++++++++++++++++++++
 crates/dash_regex/src/graph/mod.rs          |  52 +++++
 crates/dash_regex/src/graph/node.rs         |  93 ++++++++
 crates/dash_regex/src/lib.rs                |  94 +++++---
 crates/dash_regex/src/node.rs               |  24 +-
 crates/dash_regex/src/parser.rs             |  16 +-
 crates/dash_vm/src/dispatch.rs              |   4 +-
 crates/dash_vm/src/gc/trace.rs              |   4 +-
 crates/dash_vm/src/js_std/regex.rs          |  93 ++++----
 crates/dash_vm/src/value/regex.rs           |  12 +-
 19 files changed, 700 insertions(+), 145 deletions(-)
 create mode 100644 crates/dash_regex/src/graph/build.rs
 create mode 100644 crates/dash_regex/src/graph/eval.rs
 create mode 100644 crates/dash_regex/src/graph/mod.rs
 create mode 100644 crates/dash_regex/src/graph/node.rs

diff --git a/crates/dash_compiler/src/instruction.rs b/crates/dash_compiler/src/instruction.rs
index 6821e6b6..62e8b2ef 100755
--- a/crates/dash_compiler/src/instruction.rs
+++ b/crates/dash_compiler/src/instruction.rs
@@ -133,13 +133,8 @@ impl InstructionBuilder<'_, '_> {
         Ok(())
     }
 
-    pub fn build_regex_constant(
-        &mut self,
-        regex: dash_regex::ParsedRegex,
-        flags: dash_regex::Flags,
-        sym: Symbol,
-    ) -> Result<(), LimitExceededError> {
-        let RegexConstant(id) = self.current_function_mut().cp.add_regex((regex, flags, sym))?;
+    pub fn build_regex_constant(&mut self, regex: dash_regex::Regex, sym: Symbol) -> Result<(), LimitExceededError> {
+        let RegexConstant(id) = self.current_function_mut().cp.add_regex((regex, sym))?;
         self.write_instr(Instruction::Regex);
         self.writew(id);
         Ok(())
diff --git a/crates/dash_compiler/src/lib.rs b/crates/dash_compiler/src/lib.rs
index b55bd7dd..60999611 100644
--- a/crates/dash_compiler/src/lib.rs
+++ b/crates/dash_compiler/src/lib.rs
@@ -606,7 +606,7 @@ impl Visitor<Result<(), Error>> for FunctionCompiler<'_> {
             LiteralExpr::Number(n) => ib.build_number_constant(n),
             LiteralExpr::String(s) => ib.build_string_constant(s),
             LiteralExpr::Identifier(_) => unreachable!("identifiers are handled in visit_identifier_expression"),
-            LiteralExpr::Regex(regex, flags, sym) => ib.build_regex_constant(regex, flags, sym),
+            LiteralExpr::Regex(regex, sym) => ib.build_regex_constant(regex, sym),
             LiteralExpr::Null => ib.build_null_constant(),
             LiteralExpr::Undefined => ib.build_undefined_constant(),
         };
diff --git a/crates/dash_decompiler/src/decompiler.rs b/crates/dash_decompiler/src/decompiler.rs
index 93de4195..9c544fd8 100644
--- a/crates/dash_decompiler/src/decompiler.rs
+++ b/crates/dash_decompiler/src/decompiler.rs
@@ -147,7 +147,7 @@ impl<'interner, 'buf> FunctionDecompiler<'interner, 'buf> {
                         ),
                         Instruction::Regex => (
                             "regex",
-                            &self.interner.resolve(self.constants.regexes[RegexConstant(id)].2) as &dyn fmt::Display,
+                            &self.interner.resolve(self.constants.regexes[RegexConstant(id)].1) as &dyn fmt::Display,
                         ),
                         _ => unreachable!(),
                     };
diff --git a/crates/dash_middle/src/compiler/constant.rs b/crates/dash_middle/src/compiler/constant.rs
index 9240bc07..3edca5ab 100755
--- a/crates/dash_middle/src/compiler/constant.rs
+++ b/crates/dash_middle/src/compiler/constant.rs
@@ -2,7 +2,7 @@ use core::fmt;
 use std::cell::Cell;
 use std::rc::Rc;
 
-use dash_regex::{Flags, ParsedRegex};
+use dash_regex::Regex;
 
 use crate::index_type;
 use crate::indexvec::IndexThinVec;
@@ -96,7 +96,7 @@ pub struct ConstantPool {
     pub symbols: IndexThinVec<Symbol, SymbolConstant>,
     pub booleans: IndexThinVec<bool, BooleanConstant>,
     pub functions: IndexThinVec<Rc<Function>, FunctionConstant>,
-    pub regexes: IndexThinVec<(ParsedRegex, Flags, Symbol), RegexConstant>,
+    pub regexes: IndexThinVec<(Regex, Symbol), RegexConstant>,
 }
 
 pub struct LimitExceededError;
@@ -120,6 +120,6 @@ impl ConstantPool {
         add_symbol(symbols, Symbol) -> SymbolConstant,
         add_boolean(booleans, bool) -> BooleanConstant,
         add_function(functions, Rc<Function>) -> FunctionConstant,
-        add_regex(regexes, (ParsedRegex, Flags, Symbol)) -> RegexConstant
+        add_regex(regexes, (Regex, Symbol)) -> RegexConstant
     );
 }
diff --git a/crates/dash_middle/src/parser/expr.rs b/crates/dash_middle/src/parser/expr.rs
index f3e5fa8a..2c4815d8 100644
--- a/crates/dash_middle/src/parser/expr.rs
+++ b/crates/dash_middle/src/parser/expr.rs
@@ -202,8 +202,8 @@ impl ExprKind {
         Self::Literal(LiteralExpr::Undefined)
     }
 
-    pub fn regex_literal(regex: dash_regex::ParsedRegex, flags: dash_regex::Flags, source: Symbol) -> Self {
-        Self::Literal(LiteralExpr::Regex(regex, flags, source))
+    pub fn regex_literal(regex: dash_regex::Regex, source: Symbol) -> Self {
+        Self::Literal(LiteralExpr::Regex(regex, source))
     }
 
     /// Creates a function call expression
@@ -551,8 +551,8 @@ pub enum LiteralExpr {
     #[display(fmt = "\"{_0}\"")]
     String(Symbol),
 
-    #[display(fmt = "/{_2}/")]
-    Regex(dash_regex::ParsedRegex, dash_regex::Flags, Symbol),
+    #[display(fmt = "/{_1}/")]
+    Regex(dash_regex::Regex, Symbol),
 
     #[display(fmt = "null")]
     Null,
diff --git a/crates/dash_parser/src/expr.rs b/crates/dash_parser/src/expr.rs
index 64740d57..20857993 100644
--- a/crates/dash_parser/src/expr.rs
+++ b/crates/dash_parser/src/expr.rs
@@ -10,7 +10,6 @@ use dash_middle::parser::statement::{
     StatementKind,
 };
 use dash_middle::sourcemap::Span;
-use dash_regex::Flags;
 
 use crate::{Parser, any};
 
@@ -874,14 +873,9 @@ impl Parser<'_, '_> {
                 // Trim / prefix and suffix
                 let full = self.interner.resolve(literal);
                 let full = &full[1..full.len() - 1];
-                let (nodes, flags) = match dash_regex::Parser::new(full.as_bytes()).parse_all().and_then(|node| {
-                    self.interner
-                        .resolve(flags)
-                        .parse::<Flags>()
-                        .map_err(Into::into)
-                        .map(|flags| (node, flags))
-                }) {
-                    Ok((nodes, flags)) => (nodes, flags),
+                let flags = self.interner.resolve(flags);
+                let regex = match dash_regex::compile(full, flags) {
+                    Ok(regex) => regex,
                     Err(err) => {
                         let tok = *self.previous().unwrap();
                         self.error(Error::RegexSyntaxError(tok, err));
@@ -890,7 +884,7 @@ impl Parser<'_, '_> {
                 };
                 Expr {
                     span: current.span,
-                    kind: ExprKind::regex_literal(nodes, flags, literal),
+                    kind: ExprKind::regex_literal(regex, literal),
                 }
             }
             other if other.is_identifier() => {
diff --git a/crates/dash_regex/src/error.rs b/crates/dash_regex/src/error.rs
index 1a3d127b..252be1e0 100644
--- a/crates/dash_regex/src/error.rs
+++ b/crates/dash_regex/src/error.rs
@@ -10,6 +10,9 @@ pub enum Error {
     #[error("unexpected character: {}", *.0 as char)]
     UnexpectedChar(u8),
 
+    #[error("number too large to fit in a u32")]
+    Overflow,
+
     #[error("{0}")]
     Flags(#[from] flags::Error),
 }
diff --git a/crates/dash_regex/src/flags.rs b/crates/dash_regex/src/flags.rs
index 6b17cad2..eef465d3 100644
--- a/crates/dash_regex/src/flags.rs
+++ b/crates/dash_regex/src/flags.rs
@@ -1,12 +1,11 @@
 use std::str::FromStr;
 
 use bitflags::bitflags;
-use serde::{Deserialize, Serialize};
 use thiserror::Error;
 
 bitflags! {
     #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
-    #[cfg_attr(feature = "format", derive(Serialize, Deserialize))]
+    #[cfg_attr(feature = "format", derive(serde::Serialize, serde::Deserialize))]
     pub struct Flags: u8 {
         const GLOBAL = 1;
         const IGNORE_CASE = 2;
diff --git a/crates/dash_regex/src/graph/build.rs b/crates/dash_regex/src/graph/build.rs
new file mode 100644
index 00000000..59dd2fce
--- /dev/null
+++ b/crates/dash_regex/src/graph/build.rs
@@ -0,0 +1,160 @@
+use core::slice;
+use std::collections::HashMap;
+
+use crate::graph::node::{BuildGraph, CharacterClassItem, Node, NodeId, NodeKind};
+use crate::node::{CharacterClassItem as ParsedCharacterClassItem, GroupCaptureMode};
+
+use crate::node::Node as ParseNode;
+use crate::parser::ParsedRegex;
+
+use super::node::Graph;
+
+type CaptureGroupMap = HashMap<*const ParseNode, u32>;
+
+pub fn number_groups(regex: &ParsedRegex) -> CaptureGroupMap {
+    fn inner(map: &mut CaptureGroupMap, _counter: &mut u32, nodes: &[ParseNode]) {
+        if let Some((node, rest)) = nodes.split_first() {
+            match node {
+                ParseNode::Group(id, nodes) => {
+                    if let GroupCaptureMode::Id(id) = *id {
+                        map.insert(node, id.try_into().unwrap());
+                    }
+
+                    inner(map, _counter, nodes);
+                }
+                ParseNode::Optional(node) => inner(map, _counter, slice::from_ref(&**node)),
+                ParseNode::Or(left, right) => {
+                    inner(map, _counter, left);
+                    inner(map, _counter, right);
+                }
+                ParseNode::Repetition { node, .. } => inner(map, _counter, slice::from_ref(&**node)),
+                ParseNode::AnyCharacter
+                | ParseNode::MetaSequence(_)
+                | ParseNode::LiteralCharacter(_)
+                | ParseNode::CharacterClass(_)
+                | ParseNode::Anchor(_) => {} // cannot contain group nodes
+            }
+
+            inner(map, _counter, rest);
+        }
+    }
+
+    let mut map = HashMap::new();
+    let counter = &mut 0;
+    inner(&mut map, counter, &regex.nodes);
+    map
+}
+
+pub fn build(group_numbers: &CaptureGroupMap, regex: &ParsedRegex) -> (Graph, Option<NodeId>) {
+    fn lower_repetition(
+        graph: &mut BuildGraph,
+        group_numbers: &CaptureGroupMap,
+        node: &ParseNode,
+        min: u32,
+        max: Option<u32>,
+        next: Option<NodeId>,
+    ) -> NodeId {
+        let end_id = graph.push(Node {
+            next,
+            kind: NodeKind::RepetitionEnd {
+                start: NodeId::DUMMY, // will be set later
+            },
+        });
+        let inner_id = inner(graph, group_numbers, slice::from_ref(node), Some(end_id)).unwrap();
+        let start_id = graph.push(Node {
+            next,
+            kind: NodeKind::RepetitionStart {
+                min,
+                max,
+                inner: inner_id,
+            },
+        });
+        let NodeKind::RepetitionEnd { start } = &mut graph[end_id].kind else {
+            unreachable!()
+        };
+        *start = start_id;
+        start_id
+    }
+
+    fn inner(
+        graph: &mut BuildGraph,
+        group_numbers: &CaptureGroupMap,
+        nodes: &[ParseNode],
+        outer_next: Option<NodeId>,
+    ) -> Option<NodeId> {
+        if let Some((current, rest)) = nodes.split_first() {
+            let next = inner(graph, group_numbers, rest, outer_next);
+            match *current {
+                ParseNode::AnyCharacter => Some(graph.push(Node {
+                    next,
+                    kind: NodeKind::AnyCharacter,
+                })),
+                ParseNode::MetaSequence(meta) => Some(graph.push(Node {
+                    next,
+                    kind: NodeKind::Meta(meta),
+                })),
+                ParseNode::Repetition { ref node, min, max } => {
+                    Some(lower_repetition(graph, group_numbers, node, min, max, next))
+                }
+                ParseNode::LiteralCharacter(literal) => Some(graph.push(Node {
+                    next,
+                    kind: NodeKind::Literal(literal),
+                })),
+                ParseNode::CharacterClass(ref parse_items) => {
+                    let items = parse_items
+                        .iter()
+                        .map(|item| match *item {
+                            ParsedCharacterClassItem::Node(ParseNode::AnyCharacter) => CharacterClassItem::AnyCharacter,
+                            ParsedCharacterClassItem::Node(ParseNode::LiteralCharacter(literal)) => {
+                                CharacterClassItem::Literal(literal)
+                            }
+                            ParsedCharacterClassItem::Node(ParseNode::MetaSequence(meta)) => {
+                                CharacterClassItem::Meta(meta)
+                            }
+                            ParsedCharacterClassItem::Node(ref node) => {
+                                panic!("cannot lower {node:?} in character class")
+                            }
+                            ParsedCharacterClassItem::Range(from, to) => CharacterClassItem::Range(from, to),
+                        })
+                        .collect::<Box<[_]>>();
+
+                    Some(graph.push(Node {
+                        next,
+                        kind: NodeKind::CharacterClass(items),
+                    }))
+                }
+                ParseNode::Anchor(anchor) => Some(graph.push(Node {
+                    next,
+                    kind: NodeKind::Anchor(anchor),
+                })),
+                ParseNode::Or(ref left, ref right) => {
+                    let left = inner(graph, group_numbers, left, next).unwrap();
+                    let right = inner(graph, group_numbers, right, next).unwrap();
+                    Some(graph.push(Node {
+                        next,
+                        kind: NodeKind::Or(left, right),
+                    }))
+                }
+                ParseNode::Optional(ref node) => Some(lower_repetition(graph, group_numbers, node, 0, Some(1), next)),
+                ParseNode::Group(_, ref nodes) => {
+                    let group_id = group_numbers.get(&(current as *const ParseNode)).copied();
+                    let end = graph.push(Node {
+                        next,
+                        kind: NodeKind::GroupEnd { group_id },
+                    });
+                    let inner_id = inner(graph, group_numbers, nodes, Some(end)).unwrap();
+                    Some(graph.push(Node {
+                        next: Some(inner_id),
+                        kind: NodeKind::GroupStart { group_id },
+                    }))
+                }
+            }
+        } else {
+            outer_next
+        }
+    }
+
+    let mut graph = BuildGraph::new();
+    let root = inner(&mut graph, group_numbers, &regex.nodes, None);
+    (graph.finalize(), root)
+}
diff --git a/crates/dash_regex/src/graph/eval.rs b/crates/dash_regex/src/graph/eval.rs
new file mode 100644
index 00000000..8867c3dc
--- /dev/null
+++ b/crates/dash_regex/src/graph/eval.rs
@@ -0,0 +1,246 @@
+use crate::graph::node::CharacterClassItem;
+use crate::node::Anchor;
+
+use super::Regex;
+use super::node::{Graph, NodeId, NodeKind};
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum ProcessedGroupState {
+    Confirmed,
+    Unconfirmed,
+}
+
+struct Cx<'a> {
+    processed_groups: &'a mut [Option<(u32, u32, ProcessedGroupState)>],
+    pending_groups: &'a mut [(Option<u32>, Option<u32>)],
+    /// The full input source of this "attempt".
+    full_input: &'a [u8],
+    graph: &'a Graph,
+    /// The offset of `full_input` in the *original* input string.
+    offset_from_original: u32,
+    current_repetition_count: Option<u32>,
+}
+
+impl Cx<'_> {
+    /// Returns the offset of the passed in slice relative to the full input.
+    /// The slice must actually be obtained from the full input for the return value to make sense.
+    /// The value is unspecified (but not undefined) if passed an input slice from somewhere else.
+    pub fn offset(&self, s: &[u8]) -> u32 {
+        (s.as_ptr().addr() - self.full_input.as_ptr().addr()) as u32
+    }
+
+    /// Same as `offset`, but returns it relative to the original input.
+    pub fn offset_from_original(&self, s: &[u8]) -> u32 {
+        self.offset_from_original + self.offset(s)
+    }
+
+    /// Creates a new context usable for the specified node.
+    pub fn for_node(&mut self, node: NodeId, origin: NodeId) -> Cx<'_> {
+        let Self {
+            processed_groups: &mut ref mut processed_groups,
+            pending_groups: &mut ref mut pending_groups,
+            full_input,
+            graph,
+            offset_from_original,
+            mut current_repetition_count,
+        } = *self;
+
+        if let NodeKind::RepetitionStart { .. } = graph[node].kind {
+            if let NodeKind::RepetitionEnd { .. } = graph[origin].kind {
+                current_repetition_count = Some(current_repetition_count.unwrap() + 1);
+            } else {
+                current_repetition_count = Some(0);
+            }
+        }
+
+        Cx {
+            processed_groups,
+            pending_groups,
+            full_input,
+            graph,
+            offset_from_original,
+            current_repetition_count,
+        }
+    }
+}
+
+fn step(mut cx: Cx, node_id: NodeId, mut input: &[u8]) -> bool {
+    // The reason for shadowing cx with a borrow here is so that you're forced to go through `Cx::for_node` when calling `step(...)`.
+    // You can't pass the same `cx` when evaluating a sub-node.
+    let cx = &mut cx;
+    let node = &cx.graph[node_id];
+
+    let mut matches = match node.kind {
+        NodeKind::AnyCharacter => {
+            if let Some(rest) = input.get(1..) {
+                input = rest;
+                true
+            } else {
+                false
+            }
+        }
+        NodeKind::RepetitionStart { min, max, inner } => 'arm: {
+            let current_repetition_count = cx.current_repetition_count.unwrap();
+
+            if let Some(max) = max {
+                if current_repetition_count >= max {
+                    // We've done `max` number of iterations.
+                    break 'arm true;
+                }
+            }
+
+            if step(cx.for_node(inner, node_id), inner, input) {
+                // This has automatically also checked the rest input. Don't need to do that again here after the match.
+                return true;
+            }
+            current_repetition_count >= min
+        }
+        NodeKind::Anchor(Anchor::StartOfString) => input.len() == cx.full_input.len(),
+        NodeKind::Anchor(Anchor::EndOfString) => input.is_empty(),
+        NodeKind::Meta(meta) => {
+            if let Some((_, rest)) = input.split_first().filter(|&(&c, _)| meta.matches(c)) {
+                input = rest;
+                true
+            } else {
+                false
+            }
+        }
+        NodeKind::CharacterClass(ref items) => {
+            if let Some((_, rest)) = input.split_first().filter(|&(&c, _)| {
+                items.iter().copied().any(|item| match item {
+                    CharacterClassItem::Literal(lit) => lit == c,
+                    CharacterClassItem::AnyCharacter => true,
+                    CharacterClassItem::Meta(meta) => meta.matches(c),
+                    CharacterClassItem::Range(min, max) => (min..=max).contains(&c),
+                })
+            }) {
+                input = rest;
+                true
+            } else {
+                false
+            }
+        }
+        NodeKind::Literal(lit) => {
+            if let Some((_, rest)) = input.split_first().filter(|&(&c, _)| c == lit) {
+                input = rest;
+                true
+            } else {
+                false
+            }
+        }
+        NodeKind::Or(left, right) => {
+            return step(cx.for_node(left, node_id), left, input) || step(cx.for_node(right, node_id), right, input);
+        }
+        NodeKind::RepetitionEnd { start } => {
+            return step(cx.for_node(start, node_id), start, input);
+        }
+        NodeKind::GroupStart { group_id } => {
+            if let Some(group_id) = group_id {
+                let offset = cx.offset_from_original(input);
+                cx.pending_groups[group_id as usize] = (Some(offset), None);
+            }
+            true
+        }
+        NodeKind::GroupEnd { group_id } => {
+            if let Some(group_id) = group_id {
+                let group_id = group_id as usize;
+
+                let old = cx.processed_groups[group_id];
+                let start = cx.pending_groups[group_id].0.unwrap();
+                let end = cx.offset_from_original(input);
+                cx.processed_groups[group_id] = Some((start, end, ProcessedGroupState::Unconfirmed));
+
+                return if let Some(next) = node.next {
+                    let matches = step(cx.for_node(next, node_id), next, input);
+                    cx.pending_groups[group_id] = (Some(start), Some(end));
+
+                    if matches {
+                        if cx.processed_groups[group_id].is_none_or(|(.., s)| s == ProcessedGroupState::Unconfirmed) {
+                            // This group may have been processed again in a subsequent iteration.
+                            // Only overwrite it back with this iteration's if it's still unconfirmed
+                            cx.processed_groups[group_id] = Some((start, end, ProcessedGroupState::Confirmed));
+                        }
+
+                        true
+                    } else {
+                        // We did not match. Restore to old.
+                        if let Some((a, b, _)) = old {
+                            cx.processed_groups[group_id] = Some((a, b, ProcessedGroupState::Unconfirmed));
+                        } else {
+                            cx.processed_groups[group_id] = None;
+                        }
+                        false
+                    }
+                } else {
+                    // No next node.
+                    cx.processed_groups[group_id].as_mut().unwrap().2 = ProcessedGroupState::Confirmed;
+                    true
+                };
+            }
+
+            true
+        }
+    };
+
+    if let Some(next) = node.next {
+        matches = matches && step(cx.for_node(next, node_id), next, input);
+    }
+    matches
+}
+
+#[derive(Debug)]
+pub struct EvalSuccess {
+    pub groups: Box<[Option<(u32, u32, ProcessedGroupState)>]>,
+}
+
+#[derive(Debug)]
+pub struct NoMatch;
+
+pub fn eval(regex: &Regex, mut input: &[u8]) -> Result<EvalSuccess, NoMatch> {
+    let Some(root) = regex.root else {
+        // Nothing to do for empty regexes.
+        return Ok(EvalSuccess { groups: Box::default() });
+    };
+
+    let mut processed_groups = vec![None; regex.group_count as usize].into_boxed_slice();
+    let mut pending_groups = vec![(None, None); regex.group_count as usize].into_boxed_slice();
+    let mut offset_from_original = 0;
+    loop {
+        // TODO: add a fast reject path where we find the first required character and seek to it in `input`
+        processed_groups[0] = Some((
+            offset_from_original,
+            offset_from_original + input.len() as u32,
+            ProcessedGroupState::Confirmed,
+        ));
+        processed_groups[1..].fill(None);
+        pending_groups.fill((None, None));
+
+        let cx = Cx {
+            processed_groups: &mut processed_groups,
+            pending_groups: &mut pending_groups,
+            current_repetition_count: if let NodeKind::RepetitionStart { .. } = regex.graph[root].kind {
+                Some(0)
+            } else {
+                None
+            },
+            offset_from_original,
+            full_input: input,
+            graph: &regex.graph,
+        };
+
+        if step(cx, root, input) {
+            return Ok(EvalSuccess {
+                groups: processed_groups,
+            });
+        }
+
+        if let Some(rest) = input.get(1..) {
+            offset_from_original += 1;
+            input = rest;
+        } else {
+            break;
+        }
+    }
+
+    Err(NoMatch)
+}
diff --git a/crates/dash_regex/src/graph/mod.rs b/crates/dash_regex/src/graph/mod.rs
new file mode 100644
index 00000000..a0e8b11c
--- /dev/null
+++ b/crates/dash_regex/src/graph/mod.rs
@@ -0,0 +1,52 @@
+mod build;
+pub mod eval;
+pub mod node;
+
+use eval::{EvalSuccess, NoMatch};
+use node::{Graph, NodeId};
+
+use crate::Flags;
+use crate::parser::ParsedRegex;
+
+/// A finalized, compiled regex.
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "format", derive(serde::Serialize, serde::Deserialize))]
+pub struct Regex {
+    graph: Graph,
+    flags: Flags,
+    root: Option<NodeId>,
+    group_count: u32,
+}
+
+impl Regex {
+    pub fn eval(&self, input: &str) -> Result<EvalSuccess, NoMatch> {
+        eval::eval(self, input.as_bytes())
+    }
+
+    pub fn matches(&self, input: &str) -> bool {
+        self.eval(input).is_ok()
+    }
+
+    pub fn flags(&self) -> Flags {
+        self.flags
+    }
+}
+
+pub fn compile(regex: ParsedRegex, flags: Flags) -> Regex {
+    // We're going to have a hashmap with pointers as keys.
+    // Accidentally moving the regex would invalidate pointers.
+    // We never actually dereference them so it doesn't matter for safety, but it would still lead to
+    // bugs. So make it a borrow.
+    let regex = &regex;
+
+    let numbered = build::number_groups(regex);
+    let (graph, root) = build::build(&numbered, regex);
+    let group_count = u32::try_from(regex.group_count).unwrap();
+
+    Regex {
+        graph,
+        group_count,
+        flags,
+        root,
+    }
+}
diff --git a/crates/dash_regex/src/graph/node.rs b/crates/dash_regex/src/graph/node.rs
new file mode 100644
index 00000000..07afb359
--- /dev/null
+++ b/crates/dash_regex/src/graph/node.rs
@@ -0,0 +1,93 @@
+use std::ops::{Deref, DerefMut, Index, IndexMut};
+
+use crate::node::{Anchor, MetaSequence};
+
+#[derive(Debug, Clone, Copy)]
+#[cfg_attr(feature = "format", derive(serde::Serialize, serde::Deserialize))]
+pub struct NodeId(u32);
+impl NodeId {
+    pub(super) const DUMMY: NodeId = NodeId(u32::MAX);
+}
+
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "format", derive(serde::Serialize, serde::Deserialize))]
+pub struct Node {
+    pub next: Option<NodeId>,
+    pub kind: NodeKind,
+}
+
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "format", derive(serde::Serialize, serde::Deserialize))]
+pub enum NodeKind {
+    AnyCharacter,
+    RepetitionStart {
+        min: u32,
+        max: Option<u32>,
+        /// The node being repeated
+        inner: NodeId,
+    },
+    Anchor(Anchor),
+    Meta(MetaSequence),
+    CharacterClass(Box<[CharacterClassItem]>),
+    Literal(u8),
+    Or(NodeId, NodeId),
+    RepetitionEnd {
+        /// The `RepetitionStart` node to jump to when executing the next repetition iteration
+        start: NodeId,
+    },
+    GroupStart {
+        group_id: Option<u32>,
+    },
+    GroupEnd {
+        group_id: Option<u32>,
+    },
+}
+
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "format", derive(serde::Serialize, serde::Deserialize))]
+pub enum CharacterClassItem {
+    Literal(u8),
+    AnyCharacter,
+    Meta(MetaSequence),
+    Range(u8, u8),
+}
+
+pub type BuildGraph = Graph<Vec<Node>>;
+
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "format", derive(serde::Serialize, serde::Deserialize))]
+pub struct Graph<C = Box<[Node]>> {
+    nodes: C,
+}
+
+impl BuildGraph {
+    pub fn new() -> Self {
+        Self { nodes: Vec::new() }
+    }
+
+    pub fn push(&mut self, node: Node) -> NodeId {
+        let id = u32::try_from(self.nodes.len()).expect("attempted to insert more than 2^32 nodes");
+        self.nodes.push(node);
+        NodeId(id)
+    }
+
+    pub fn finalize(self) -> Graph {
+        Graph {
+            nodes: self.nodes.into_boxed_slice(),
+        }
+    }
+}
+
+// Requires an indirection through the deref trait because `Box<[T]>` does not implement `Index<usize>`...
+impl<C: Deref<Target: Index<usize, Output = Node>>> Index<NodeId> for Graph<C> {
+    type Output = Node;
+    fn index(&self, index: NodeId) -> &Self::Output {
+        &self.nodes[index.0 as usize]
+    }
+}
+
+impl<C: DerefMut<Target: IndexMut<usize, Output = Node>>> IndexMut<NodeId> for Graph<C> {
+    fn index_mut(&mut self, index: NodeId) -> &mut Self::Output {
+        &mut self.nodes[index.0 as usize]
+    }
+}
diff --git a/crates/dash_regex/src/lib.rs b/crates/dash_regex/src/lib.rs
index 4f5d539c..09e9c150 100644
--- a/crates/dash_regex/src/lib.rs
+++ b/crates/dash_regex/src/lib.rs
@@ -1,55 +1,73 @@
+use std::str::FromStr;
+
 pub use error::Error;
-pub use matcher::Matcher;
-pub use node::Node;
-pub use parser::Parser;
+pub use graph::eval::EvalSuccess;
 
 pub mod error;
 pub mod flags;
-pub mod matcher;
-pub mod node;
-pub mod parser;
-mod stream;
-mod visitor;
+mod node;
+mod parser;
+
+mod graph;
 
 pub use flags::Flags;
-pub use parser::ParsedRegex;
+pub use graph::Regex;
+use parser::Parser;
+
+pub trait ParseFlags {
+    fn parse(self) -> Result<Flags, Error>;
+}
+
+impl ParseFlags for &str {
+    fn parse(self) -> Result<Flags, Error> {
+        Flags::from_str(self).map_err(Into::into)
+    }
+}
+
+impl ParseFlags for Flags {
+    fn parse(self) -> Result<Flags, Error> {
+        Ok(self)
+    }
+}
+
+pub fn compile(input: &str, flags: impl ParseFlags) -> Result<Regex, Error> {
+    let parsed = Parser::new(input.as_bytes()).parse_all()?;
+    let flags = flags.parse()?;
+    Ok(graph::compile(parsed, flags))
+}
 
 #[cfg(test)]
 #[test]
 pub fn test() {
-    use parser::Parser;
-
-    use crate::matcher::Matcher;
+    fn assert_matches_groups(regex: &Regex, input: &str, groups: &[&str]) {
+        let res = regex.eval(input).unwrap();
 
-    fn matches(regex: &str, input: &str) -> bool {
-        let nodes = Parser::new(regex.as_bytes()).parse_all().unwrap();
-        let mut matcher = Matcher::new(&nodes, input.as_bytes());
-        matcher.matches()
+        for (&expected, got) in groups.iter().zip(&res.groups[1..]) {
+            let (from, to, _) = got.expect("no group");
+            assert_eq!(expected, &input[from as usize..to as usize]);
+        }
     }
 
-    fn matches_groups(regex: &str, input: &str, groups: &[&str]) -> bool {
-        let nodes = Parser::new(regex.as_bytes()).parse_all().unwrap();
-        let mut matcher = Matcher::new(&nodes, input.as_bytes());
-        matcher.matches()
-            && nodes.group_count - 1 == groups.len()
-            && matcher
-                .groups
-                .iter()
-                .skip(1)
-                .zip(groups)
-                .all(|(group, expected)| group.map(|range| &input[range]) == Some(*expected))
-    }
+    let hex_regex = compile(
+        "^#?([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})$",
+        "",
+    )
+    .unwrap();
+    assert!(hex_regex.matches("#aabbccdd"));
+    assert!(!hex_regex.matches("#AAb"));
+    assert!(hex_regex.matches("#aBcDEEf0"));
 
-    const HEX_REGEX: &str = "^#?([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})$";
-    assert!(matches(HEX_REGEX, "#aabbccdd"));
-    assert!(!matches(HEX_REGEX, "#AAb"));
-    assert!(matches(HEX_REGEX, "#aBcDEEf0"));
+    assert!(compile("\\d", "").unwrap().matches("a1"));
+    assert!(compile("V\\dX", "").unwrap().matches("aV1aVaXaV1Xs"));
+    assert!(!compile("V\\dX", "").unwrap().matches("aV1aVaXaV?Xs"));
 
-    assert!(matches("\\d", "a1"));
-    assert!(matches("V\\dX", "aV1aVaXaV1Xs"));
-    assert!(!matches("V\\dX", "aV1aVaXaV?Xs"));
+    let rgb_regex = compile(r"rgb[\s|\(]+((?:[-\+]?\d*\.\d+%?)|(?:[-\+]?\d+%?))[,|\s]+((?:[-\+]?\d*\.\d+%?)|(?:[-\+]?\d+%?))[,|\s]+((?:[-\+]?\d*\.\d+%?)|(?:[-\+]?\d+%?))\s*\)?","").unwrap();
+    assert!(rgb_regex.matches("rgb(255, 255, 255)"));
+    assert_matches_groups(&rgb_regex, "rgb(144, 17, 9)", &["144", "17", "9"]);
 
-    const RGB: &str = r"rgb[\s|\(]+((?:[-\+]?\d*\.\d+%?)|(?:[-\+]?\d+%?))[,|\s]+((?:[-\+]?\d*\.\d+%?)|(?:[-\+]?\d+%?))[,|\s]+((?:[-\+]?\d*\.\d+%?)|(?:[-\+]?\d+%?))\s*\)?";
-    assert!(matches(RGB, "rgb(255, 255, 255)"));
-    assert!(matches_groups(RGB, "rgb(144, 17, 9)", &["144", "17", "9"]));
+    // Backtracking
+    assert_matches_groups(&compile("x(.+)x", "").unwrap(), "vxxxv", &["x"]);
+    assert_matches_groups(&compile(".(.)+abcd", "").unwrap(), "vxabcdabcabcabcabc", &["x"]);
+    assert_matches_groups(&compile("(.+)+a", "").unwrap(), "bba", &["bb"]);
+    assert_matches_groups(&compile("(.+)+ac", "").unwrap(), "bacbaabaabaa", &["b"]);
 }
diff --git a/crates/dash_regex/src/node.rs b/crates/dash_regex/src/node.rs
index 2b89bbcd..b13c2dac 100644
--- a/crates/dash_regex/src/node.rs
+++ b/crates/dash_regex/src/node.rs
@@ -9,7 +9,7 @@ pub enum CharacterClassItem {
 }
 
 #[cfg_attr(feature = "format", derive(Serialize, Deserialize))]
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, Copy, PartialEq)]
 pub enum GroupCaptureMode {
     /// `(?:...)`
     None,
@@ -24,8 +24,8 @@ pub enum Node {
     MetaSequence(MetaSequence),
     Repetition {
         node: Box<Node>,
-        min: usize,
-        max: Option<usize>,
+        min: u32,
+        max: Option<u32>,
     },
     LiteralCharacter(u8),
     CharacterClass(Vec<CharacterClassItem>),
@@ -36,14 +36,14 @@ pub enum Node {
 }
 
 impl Node {
-    pub fn unbounded_max_repetition(node: Node, min: usize) -> Self {
+    pub fn unbounded_max_repetition(node: Node, min: u32) -> Self {
         Self::Repetition {
             node: Box::new(node),
             min,
             max: None,
         }
     }
-    pub fn repetition(node: Node, min: usize, max: usize) -> Self {
+    pub fn repetition(node: Node, min: u32, max: u32) -> Self {
         Self::Repetition {
             node: Box::new(node),
             min,
@@ -56,15 +56,25 @@ impl Node {
 }
 
 #[cfg_attr(feature = "format", derive(Serialize, Deserialize))]
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, Copy, PartialEq)]
 pub enum MetaSequence {
     Digit,
     Word,
     Whitespace,
 }
 
+impl MetaSequence {
+    pub fn matches(self, c: u8) -> bool {
+        match self {
+            MetaSequence::Digit => c.is_ascii_digit(),
+            MetaSequence::Word => c.is_ascii_alphanumeric() || c == b'_',
+            MetaSequence::Whitespace => c.is_ascii_whitespace(),
+        }
+    }
+}
+
 #[cfg_attr(feature = "format", derive(Serialize, Deserialize))]
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, Copy, PartialEq)]
 pub enum Anchor {
     StartOfString,
     EndOfString,
diff --git a/crates/dash_regex/src/parser.rs b/crates/dash_regex/src/parser.rs
index aaa23eee..100c435b 100644
--- a/crates/dash_regex/src/parser.rs
+++ b/crates/dash_regex/src/parser.rs
@@ -1,7 +1,5 @@
 use std::mem;
 
-use serde::{Deserialize, Serialize};
-
 use crate::error::Error;
 use crate::node::{Anchor, CharacterClassItem, GroupCaptureMode, MetaSequence, Node};
 
@@ -12,7 +10,7 @@ pub struct Parser<'a> {
 }
 
 #[derive(Debug, Clone)]
-#[cfg_attr(feature = "format", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "format", derive(serde::Serialize, serde::Deserialize))]
 pub struct ParsedRegex {
     pub nodes: Vec<Node>,
     pub group_count: usize,
@@ -101,12 +99,14 @@ impl<'a> Parser<'a> {
         Ok(node)
     }
 
-    fn read_int(&mut self) -> Result<usize, Error> {
-        let mut number = 0;
+    fn read_u32(&mut self) -> Result<u32, Error> {
+        let mut number = 0u32;
         while let Some(byte) = self.current() {
             match byte {
                 b'0'..=b'9' => {
-                    number = number * 10 + (byte - b'0') as usize;
+                    number = number.checked_mul(10).ok_or(Error::Overflow)?;
+                    number = number.checked_add((byte - b'0') as u32).ok_or(Error::Overflow)?;
+
                     self.advance();
                 }
                 _ => return Ok(number),
@@ -116,7 +116,7 @@ impl<'a> Parser<'a> {
     }
 
     fn parse_bounded_repetition(&mut self, node: Node) -> Result<Node, Error> {
-        let min = self.read_int()?;
+        let min = self.read_u32()?;
         match self.current() {
             Some(b',') => {
                 self.advance();
@@ -126,7 +126,7 @@ impl<'a> Parser<'a> {
                         Ok(Node::unbounded_max_repetition(node, min))
                     }
                     _ => {
-                        let max = self.read_int()?;
+                        let max = self.read_u32()?;
                         self.advance(); // }
                         Ok(Node::repetition(node, min, max))
                     }
diff --git a/crates/dash_vm/src/dispatch.rs b/crates/dash_vm/src/dispatch.rs
index 4a6d418d..68167b64 100755
--- a/crates/dash_vm/src/dispatch.rs
+++ b/crates/dash_vm/src/dispatch.rs
@@ -618,9 +618,9 @@ mod handlers {
 
     pub fn regex_constant(mut cx: DispatchContext<'_>) -> Result<Option<HandleResult>, Unrooted> {
         let id = cx.fetchw_and_inc_ip();
-        let (nodes, flags, source) = &cx.constants().regexes[RegexConstant(id)];
+        let (regex, source) = &cx.constants().regexes[RegexConstant(id)];
 
-        let regex = RegExp::new(nodes.clone(), *flags, JsString::from(*source), &cx.scope);
+        let regex = RegExp::new(regex.clone(), JsString::from(*source), &cx.scope);
         let regex = cx.scope.register(regex);
         cx.push_stack(Value::object(regex).into());
         Ok(None)
diff --git a/crates/dash_vm/src/gc/trace.rs b/crates/dash_vm/src/gc/trace.rs
index 3e0e49b6..236fc72c 100644
--- a/crates/dash_vm/src/gc/trace.rs
+++ b/crates/dash_vm/src/gc/trace.rs
@@ -6,7 +6,7 @@ use std::rc::Rc;
 
 use dash_middle::compiler::constant::ConstantPool;
 use dash_middle::interner::StringInterner;
-use dash_regex::{Flags, ParsedRegex};
+use dash_regex::Regex;
 
 use crate::value::Unrooted;
 use crate::value::primitive::{Null, Number, Undefined};
@@ -184,7 +184,7 @@ unsafe impl Trace for dash_middle::compiler::constant::Function {
         booleans.as_slice().trace(cx);
         functions.as_slice().trace(cx);
 
-        for (ParsedRegex { .. }, Flags { .. }, sym) in regexes.as_slice() {
+        for (Regex { .. }, sym) in regexes.as_slice() {
             sym.trace(cx);
         }
     }
diff --git a/crates/dash_vm/src/js_std/regex.rs b/crates/dash_vm/src/js_std/regex.rs
index a83d1025..1569ebc0 100644
--- a/crates/dash_vm/src/js_std/regex.rs
+++ b/crates/dash_vm/src/js_std/regex.rs
@@ -6,9 +6,7 @@ use crate::value::ops::conversions::ValueConversion;
 use crate::value::regex::{RegExp, RegExpInner};
 use crate::value::{Value, ValueContext};
 use dash_middle::interner::sym;
-use dash_regex::Flags;
-use dash_regex::matcher::Matcher as RegexMatcher;
-use dash_regex::parser::Parser as RegexParser;
+use dash_regex::{EvalSuccess, Flags};
 
 use super::receiver_t;
 
@@ -26,7 +24,7 @@ pub fn constructor(cx: CallContext) -> Result<Value, Value> {
         None => Flags::empty(),
     };
 
-    let nodes = match RegexParser::new(pattern.res(cx.scope).as_bytes()).parse_all() {
+    let nodes = match dash_regex::compile(pattern.res(cx.scope), flags) {
         Ok(nodes) => nodes,
         Err(err) => throw!(cx.scope, SyntaxError, "Regex parser error: {}", err),
     };
@@ -34,7 +32,6 @@ pub fn constructor(cx: CallContext) -> Result<Value, Value> {
     let new_target = cx.new_target.unwrap_or(cx.scope.statics.regexp_ctor);
     let regex = RegExp::with_obj(
         nodes,
-        flags,
         pattern,
         NamedObject::instance_for_new_target(new_target, cx.scope)?,
     );
@@ -47,35 +44,32 @@ pub fn test(cx: CallContext) -> Result<Value, Value> {
 
     let regex = receiver_t::<RegExp>(cx.scope, &cx.this, "RegExp.prototype.test")?;
 
-    let RegExpInner {
-        regex,
-        last_index,
-        flags,
-        ..
-    } = match regex.inner() {
+    let RegExpInner { regex, last_index, .. } = match regex.inner() {
         Some(nodes) => nodes,
         None => throw!(cx.scope, TypeError, "Receiver must be an initialized RegExp object"),
     };
 
     let text = text.res(cx.scope);
-    let is_global = flags.contains(Flags::GLOBAL);
+    let is_global = regex.flags().contains(Flags::GLOBAL);
 
     if is_global && last_index.get() >= text.len() {
         last_index.set(0);
         return Ok(Value::boolean(false));
     }
 
-    let mut matcher = RegexMatcher::new(regex, &text.as_bytes()[last_index.get()..]);
-    if matcher.matches() {
-        if is_global {
-            last_index.set(last_index.get() + matcher.groups.get(0).unwrap().end);
+    match regex.eval(&text[last_index.get()..]) {
+        Ok(EvalSuccess { groups }) => {
+            if is_global {
+                last_index.set(last_index.get() + groups[0].unwrap().1 as usize);
+            }
+            Ok(Value::boolean(true))
         }
-        Ok(Value::boolean(true))
-    } else {
-        if is_global {
-            last_index.set(0);
+        Err(_) => {
+            if is_global {
+                last_index.set(0);
+            }
+            Ok(Value::boolean(false))
         }
-        Ok(Value::boolean(false))
     }
 }
 
@@ -84,49 +78,44 @@ pub fn exec(cx: CallContext<'_, '_>) -> Result<Value, Value> {
 
     let regex = receiver_t::<RegExp>(cx.scope, &cx.this, "RegExp.prototype.exec")?;
 
-    let RegExpInner {
-        regex,
-        last_index,
-        flags,
-        ..
-    } = match regex.inner() {
+    let RegExpInner { regex, last_index, .. } = match regex.inner() {
         Some(nodes) => nodes,
         None => throw!(cx.scope, TypeError, "Receiver must be an initialized RegExp object"),
     };
 
     let text = text.res(cx.scope).to_owned();
-    let is_global = flags.contains(Flags::GLOBAL);
+    let is_global = regex.flags().contains(Flags::GLOBAL);
 
     if is_global && last_index.get() >= text.len() {
         last_index.set(0);
         return Ok(Value::null());
     }
 
-    let mut matcher = RegexMatcher::new(regex, &text.as_bytes()[last_index.get()..]);
-    if matcher.matches() {
-        if is_global {
-            last_index.set(last_index.get() + matcher.groups.get(0).unwrap().end);
+    match regex.eval(&text[last_index.get()..]) {
+        Ok(EvalSuccess { groups }) => {
+            if is_global {
+                last_index.set(last_index.get() + groups[0].unwrap().1 as usize);
+            }
+
+            let groups = groups
+                .into_iter()
+                .map(|group| {
+                    let sub = match group {
+                        Some((from, to, _)) => cx.scope.intern(&text[from as usize..to as usize]).into(),
+                        None => sym::null.into(),
+                    };
+                    PropertyValue::static_default(Value::string(sub))
+                })
+                .collect();
+
+            let groups = Array::from_vec(groups, cx.scope);
+            Ok(Value::object(cx.scope.register(groups)))
         }
-
-        let groups = matcher
-            .groups
-            .iter()
-            .map(|g| {
-                let sub = match g {
-                    Some(r) => cx.scope.intern(&text[r]).into(),
-                    None => sym::null.into(),
-                };
-                PropertyValue::static_default(Value::string(sub))
-            })
-            .collect();
-
-        let groups = Array::from_vec(groups, cx.scope);
-        Ok(Value::object(cx.scope.register(groups)))
-    } else {
-        if is_global {
-            last_index.set(0);
+        Err(_) => {
+            if is_global {
+                last_index.set(0);
+            }
+            Ok(Value::null())
         }
-
-        Ok(Value::null())
     }
 }
diff --git a/crates/dash_vm/src/value/regex.rs b/crates/dash_vm/src/value/regex.rs
index 96a3e7ba..de245aaf 100644
--- a/crates/dash_vm/src/value/regex.rs
+++ b/crates/dash_vm/src/value/regex.rs
@@ -1,7 +1,7 @@
 use std::cell::Cell;
 
 use dash_proc_macro::Trace;
-use dash_regex::{Flags, ParsedRegex};
+use dash_regex::Regex;
 
 use crate::gc::trace::{Trace, TraceCtxt};
 use crate::{Vm, delegate, extract};
@@ -11,8 +11,7 @@ use super::string::JsString;
 
 #[derive(Debug)]
 pub struct RegExpInner {
-    pub regex: ParsedRegex,
-    pub flags: Flags,
+    pub regex: Regex,
     pub source: JsString,
     pub last_index: Cell<usize>,
 }
@@ -21,7 +20,6 @@ unsafe impl Trace for RegExpInner {
     fn trace(&self, cx: &mut TraceCtxt<'_>) {
         let Self {
             regex: _,
-            flags: _,
             source,
             last_index: _,
         } = self;
@@ -36,11 +34,10 @@ pub struct RegExp {
 }
 
 impl RegExp {
-    pub fn new(regex: ParsedRegex, flags: Flags, source: JsString, vm: &Vm) -> Self {
+    pub fn new(regex: Regex, source: JsString, vm: &Vm) -> Self {
         Self {
             inner: Some(RegExpInner {
                 regex,
-                flags,
                 source,
                 last_index: Cell::new(0),
             }),
@@ -48,11 +45,10 @@ impl RegExp {
         }
     }
 
-    pub fn with_obj(regex: ParsedRegex, flags: Flags, source: JsString, object: NamedObject) -> Self {
+    pub fn with_obj(regex: Regex, source: JsString, object: NamedObject) -> Self {
         Self {
             inner: Some(RegExpInner {
                 regex,
-                flags,
                 source,
                 last_index: Cell::new(0),
             }),

From c47f684e9ac5ffb470b75f21ee5ea67d9302f2ab Mon Sep 17 00:00:00 2001
From: y21 <30553356+y21@users.noreply.github.com>
Date: Mon, 31 Mar 2025 20:07:28 +0200
Subject: [PATCH 2/5] reimeplement repetition state as a stack

---
 .gitignore                          |   2 +
 Cargo.lock                          |  17 ++-
 crates/dash_regex/src/graph/eval.rs | 174 ++++++++++++++++------------
 crates/dash_regex/src/lib.rs        |  14 ++-
 4 files changed, 129 insertions(+), 78 deletions(-)

diff --git a/.gitignore b/.gitignore
index 38b8d1c2..47b6d028 100755
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@ target
 *.json
 *.sh
 dash-cli/tests
+typeck
+jimp-testing
diff --git a/Cargo.lock b/Cargo.lock
index 4c48cf89..5909cd75 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -157,9 +157,12 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.0.88"
+version = "1.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02f341c093d19155a6e41631ce5971aac4e9a868262212153124c15fa22d1cdc"
+checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c"
+dependencies = [
+ "shlex",
+]
 
 [[package]]
 name = "cfg-if"
@@ -1084,9 +1087,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 
 [[package]]
 name = "libc"
-version = "0.2.153"
+version = "0.2.170"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
+checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828"
 
 [[package]]
 name = "libloading"
@@ -1932,6 +1935,12 @@ dependencies = [
  "lazy_static",
 ]
 
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
 [[package]]
 name = "signal-hook-registry"
 version = "1.4.1"
diff --git a/crates/dash_regex/src/graph/eval.rs b/crates/dash_regex/src/graph/eval.rs
index 8867c3dc..c71d5231 100644
--- a/crates/dash_regex/src/graph/eval.rs
+++ b/crates/dash_regex/src/graph/eval.rs
@@ -1,3 +1,5 @@
+use std::cell::Cell;
+
 use crate::graph::node::CharacterClassItem;
 use crate::node::Anchor;
 
@@ -10,7 +12,7 @@ pub enum ProcessedGroupState {
     Unconfirmed,
 }
 
-struct Cx<'a> {
+struct Shared<'a> {
     processed_groups: &'a mut [Option<(u32, u32, ProcessedGroupState)>],
     pending_groups: &'a mut [(Option<u32>, Option<u32>)],
     /// The full input source of this "attempt".
@@ -18,69 +20,75 @@ struct Cx<'a> {
     graph: &'a Graph,
     /// The offset of `full_input` in the *original* input string.
     offset_from_original: u32,
-    current_repetition_count: Option<u32>,
 }
-
-impl Cx<'_> {
+impl Shared<'_> {
     /// Returns the offset of the passed in slice relative to the full input.
     /// The slice must actually be obtained from the full input for the return value to make sense.
     /// The value is unspecified (but not undefined) if passed an input slice from somewhere else.
-    pub fn offset(&self, s: &[u8]) -> u32 {
-        (s.as_ptr().addr() - self.full_input.as_ptr().addr()) as u32
+    pub fn offset_of(&self, remaining: &[u8]) -> u32 {
+        (remaining.as_ptr().addr() - self.full_input.as_ptr().addr()) as u32
     }
 
     /// Same as `offset`, but returns it relative to the original input.
-    pub fn offset_from_original(&self, s: &[u8]) -> u32 {
-        self.offset_from_original + self.offset(s)
+    pub fn offset_of_from_original(&self, remaining: &[u8]) -> u32 {
+        self.offset_from_original + self.offset_of(remaining)
     }
+}
 
-    /// Creates a new context usable for the specified node.
-    pub fn for_node(&mut self, node: NodeId, origin: NodeId) -> Cx<'_> {
-        let Self {
-            processed_groups: &mut ref mut processed_groups,
-            pending_groups: &mut ref mut pending_groups,
-            full_input,
-            graph,
-            offset_from_original,
-            mut current_repetition_count,
-        } = *self;
+#[derive(Debug, Clone)]
+struct Cx<'a> {
+    /// How many iterations have matched so far
+    current_repetition_count: Cell<Option<u32>>,
+    /// The offset at the start of this iteration, used to determine if we're making any progress.
+    /// If this is the same as the offset at the end of an iteration, we can return true early as it will match forever.
+    current_repetition_start: Cell<Option<u32>>,
+    parent: Option<&'a Cx<'a>>,
+}
+
+impl<'a> Cx<'a> {
+    pub fn for_node(&'a self, shared: &Shared<'_>, target: NodeId, origin: NodeId, remaining: &[u8]) -> Cx<'a> {
+        let mut current_repetition_count = self.current_repetition_count.clone();
+        let mut current_repetition_start = self.current_repetition_start.clone();
+        let mut parent = self.parent;
 
-        if let NodeKind::RepetitionStart { .. } = graph[node].kind {
-            if let NodeKind::RepetitionEnd { .. } = graph[origin].kind {
-                current_repetition_count = Some(current_repetition_count.unwrap() + 1);
+        // Moving to a RepetitionStart means we either prepare/initialize a repetition (set to 0),
+        // or increment it if we're coming from a RepetitionEnd specifically.
+        if let NodeKind::RepetitionStart { .. } = shared.graph[target].kind {
+            current_repetition_start = Cell::new(Some(shared.offset_of(remaining)));
+
+            if let NodeKind::RepetitionEnd { .. } = shared.graph[origin].kind {
+                *current_repetition_count.get_mut().as_mut().unwrap() += 1;
             } else {
-                current_repetition_count = Some(0);
+                current_repetition_count = Cell::new(Some(0));
+                parent = Some(self);
             }
         }
 
         Cx {
-            processed_groups,
-            pending_groups,
-            full_input,
-            graph,
-            offset_from_original,
             current_repetition_count,
+            current_repetition_start,
+            parent,
         }
     }
 }
 
-fn step(mut cx: Cx, node_id: NodeId, mut input: &[u8]) -> bool {
+fn step(shared: &mut Shared<'_>, cx: Cx<'_>, node_id: NodeId, mut remaining: &[u8]) -> bool {
     // The reason for shadowing cx with a borrow here is so that you're forced to go through `Cx::for_node` when calling `step(...)`.
     // You can't pass the same `cx` when evaluating a sub-node.
-    let cx = &mut cx;
-    let node = &cx.graph[node_id];
+    let mut cx = &cx;
+    let node = &shared.graph[node_id];
 
     let mut matches = match node.kind {
         NodeKind::AnyCharacter => {
-            if let Some(rest) = input.get(1..) {
-                input = rest;
+            if let Some(rest) = remaining.get(1..) {
+                remaining = rest;
                 true
             } else {
                 false
             }
         }
         NodeKind::RepetitionStart { min, max, inner } => 'arm: {
-            let current_repetition_count = cx.current_repetition_count.unwrap();
+            let current_repetition_count = cx.current_repetition_count.get().unwrap();
 
             if let Some(max) = max {
                 if current_repetition_count >= max {
@@ -89,24 +97,30 @@ fn step(mut cx: Cx, node_id: NodeId, mut input: &[u8]) -> bool {
                 }
             }
 
-            if step(cx.for_node(inner, node_id), inner, input) {
-                // This has automatically also checked the rest input. Don't need to do that again here after the match.
+            if step(shared, cx.for_node(shared, inner, node_id, remaining), inner, remaining) {
+                // This has automatically also checked the rest input. Don't (shouldn't) need to do that again here after the match.
                 return true;
             }
+
+            // Getting here means the regex cannot match the string with another repetition iteration,
+            // and we are on track to backtrack.
+            // This requires us to "pop" the current repetition and continue with the outer/parent repetition context,
+            // as this might be a nested repetition.
+            cx = cx.parent.unwrap();
             current_repetition_count >= min
         }
-        NodeKind::Anchor(Anchor::StartOfString) => input.len() == cx.full_input.len(),
-        NodeKind::Anchor(Anchor::EndOfString) => input.is_empty(),
+        NodeKind::Anchor(Anchor::StartOfString) => remaining.len() == shared.full_input.len(),
+        NodeKind::Anchor(Anchor::EndOfString) => remaining.is_empty(),
         NodeKind::Meta(meta) => {
-            if let Some((_, rest)) = input.split_first().filter(|&(&c, _)| meta.matches(c)) {
-                input = rest;
+            if let Some((_, rest)) = remaining.split_first().filter(|&(&c, _)| meta.matches(c)) {
+                remaining = rest;
                 true
             } else {
                 false
             }
         }
         NodeKind::CharacterClass(ref items) => {
-            if let Some((_, rest)) = input.split_first().filter(|&(&c, _)| {
+            if let Some((_, rest)) = remaining.split_first().filter(|&(&c, _)| {
                 items.iter().copied().any(|item| match item {
                     CharacterClassItem::Literal(lit) => lit == c,
                     CharacterClassItem::AnyCharacter => true,
@@ -114,76 +128,80 @@ fn step(mut cx: Cx, node_id: NodeId, mut input: &[u8]) -> bool {
                     CharacterClassItem::Range(min, max) => (min..=max).contains(&c),
                 })
             }) {
-                input = rest;
+                remaining = rest;
                 true
             } else {
                 false
             }
         }
         NodeKind::Literal(lit) => {
-            if let Some((_, rest)) = input.split_first().filter(|&(&c, _)| c == lit) {
-                input = rest;
+            if let Some((_, rest)) = remaining.split_first().filter(|&(&c, _)| c == lit) {
+                remaining = rest;
                 true
             } else {
                 false
             }
         }
         NodeKind::Or(left, right) => {
-            return step(cx.for_node(left, node_id), left, input) || step(cx.for_node(right, node_id), right, input);
+            return step(shared, cx.for_node(shared, left, node_id, remaining), left, remaining)
+                || step(shared, cx.for_node(shared, right, node_id, remaining), right, remaining);
         }
         NodeKind::RepetitionEnd { start } => {
-            return step(cx.for_node(start, node_id), start, input);
+            let end_off = shared.offset_of(remaining);
+            if cx.current_repetition_start.get().unwrap() == end_off {
+                // We haven't made any progress in this repetition iteration and won't.
+                return true;
+            } else {
+                return step(shared, cx.for_node(shared, start, node_id, remaining), start, remaining);
+            }
         }
         NodeKind::GroupStart { group_id } => {
             if let Some(group_id) = group_id {
-                let offset = cx.offset_from_original(input);
-                cx.pending_groups[group_id as usize] = (Some(offset), None);
+                let offset = shared.offset_of_from_original(remaining);
+                shared.pending_groups[group_id as usize] = (Some(offset), None);
             }
             true
         }
         NodeKind::GroupEnd { group_id } => {
             if let Some(group_id) = group_id {
                 let group_id = group_id as usize;
-
-                let old = cx.processed_groups[group_id];
-                let start = cx.pending_groups[group_id].0.unwrap();
-                let end = cx.offset_from_original(input);
-                cx.processed_groups[group_id] = Some((start, end, ProcessedGroupState::Unconfirmed));
+                let old = shared.processed_groups[group_id];
+                let start = shared.pending_groups[group_id].0.unwrap();
+                let end = shared.offset_of_from_original(remaining);
+                shared.processed_groups[group_id] = Some((start, end, ProcessedGroupState::Unconfirmed));
 
                 return if let Some(next) = node.next {
-                    let matches = step(cx.for_node(next, node_id), next, input);
-                    cx.pending_groups[group_id] = (Some(start), Some(end));
-
+                    let matches = step(shared, cx.for_node(shared, next, node_id, remaining), next, remaining);
+                    shared.pending_groups[group_id] = (Some(start), Some(end));
                     if matches {
-                        if cx.processed_groups[group_id].is_none_or(|(.., s)| s == ProcessedGroupState::Unconfirmed) {
+                        if shared.processed_groups[group_id].is_none_or(|(.., s)| s == ProcessedGroupState::Unconfirmed)
+                        {
                             // This group may have been processed again in a subsequent iteration.
                             // Only overwrite it back with this iteration's if it's still unconfirmed
-                            cx.processed_groups[group_id] = Some((start, end, ProcessedGroupState::Confirmed));
+                            shared.processed_groups[group_id] = Some((start, end, ProcessedGroupState::Confirmed));
                         }
-
                         true
                     } else {
                         // We did not match. Restore to old.
                         if let Some((a, b, _)) = old {
-                            cx.processed_groups[group_id] = Some((a, b, ProcessedGroupState::Unconfirmed));
+                            shared.processed_groups[group_id] = Some((a, b, ProcessedGroupState::Unconfirmed));
                         } else {
-                            cx.processed_groups[group_id] = None;
+                            shared.processed_groups[group_id] = None;
                         }
                         false
                     }
                 } else {
                     // No next node.
-                    cx.processed_groups[group_id].as_mut().unwrap().2 = ProcessedGroupState::Confirmed;
+                    shared.processed_groups[group_id].as_mut().unwrap().2 = ProcessedGroupState::Confirmed;
                     true
                 };
             }
-
             true
         }
     };
 
     if let Some(next) = node.next {
-        matches = matches && step(cx.for_node(next, node_id), next, input);
+        matches = matches && step(shared, cx.for_node(shared, next, node_id, remaining), next, remaining);
     }
     matches
 }
@@ -215,20 +233,32 @@ pub fn eval(regex: &Regex, mut input: &[u8]) -> Result<EvalSuccess, NoMatch> {
         processed_groups[1..].fill(None);
         pending_groups.fill((None, None));
 
-        let cx = Cx {
-            processed_groups: &mut processed_groups,
-            pending_groups: &mut pending_groups,
-            current_repetition_count: if let NodeKind::RepetitionStart { .. } = regex.graph[root].kind {
-                Some(0)
+        let outer_cx: Cx<'_> = Cx {
+            current_repetition_count: Cell::new(None),
+            current_repetition_start: Cell::new(None),
+            parent: None,
+        };
+        let (current_repetition_count, current_repetition_start, outer_cx) =
+            if let NodeKind::RepetitionStart { .. } = regex.graph[root].kind {
+                (Some(0), Some(0), Some(&outer_cx))
             } else {
-                None
-            },
-            offset_from_original,
+                (None, None, None)
+            };
+
+        let mut shared = Shared {
             full_input: input,
             graph: &regex.graph,
+            offset_from_original,
+            pending_groups: &mut pending_groups,
+            processed_groups: &mut processed_groups,
+        };
+        let cx = Cx {
+            current_repetition_count: Cell::new(current_repetition_count),
+            current_repetition_start: Cell::new(current_repetition_start),
+            parent: outer_cx,
         };
 
-        if step(cx, root, input) {
+        if step(&mut shared, cx, root, input) {
             return Ok(EvalSuccess {
                 groups: processed_groups,
             });
diff --git a/crates/dash_regex/src/lib.rs b/crates/dash_regex/src/lib.rs
index 09e9c150..860203a3 100644
--- a/crates/dash_regex/src/lib.rs
+++ b/crates/dash_regex/src/lib.rs
@@ -68,6 +68,16 @@ pub fn test() {
     // Backtracking
     assert_matches_groups(&compile("x(.+)x", "").unwrap(), "vxxxv", &["x"]);
     assert_matches_groups(&compile(".(.)+abcd", "").unwrap(), "vxabcdabcabcabcabc", &["x"]);
-    assert_matches_groups(&compile("(.+)+a", "").unwrap(), "bba", &["bb"]);
-    assert_matches_groups(&compile("(.+)+ac", "").unwrap(), "bacbaabaabaa", &["b"]);
+    assert_matches_groups(&compile("(.+)+a", "").unwrap(), "ba", &["b"]);
+    // Degenerate backtracking
+    assert_matches_groups(&compile("(.+)+ac", "").unwrap(), "bacbaabaabaabaa", &["b"]);
+
+    assert_matches_groups(&compile("(ab+){3,}", "").unwrap(), "ababab", &["ab"]);
+    assert_matches_groups(&compile("(([ab]+)b){3,}", "").unwrap(), "abababaa", &["ab", "a"]);
+    assert!(compile("(([ab]+)b){3,}", "").unwrap().eval("ababaaaa").is_err());
+    assert!(compile("(([ab]+)b){3,}", "").unwrap().eval("ababaaba").is_ok());
+
+    // Infinite regex needs to terminate eventually
+    assert_matches_groups(&compile("(.?)+", "").unwrap(), "", &[""]);
+    assert_matches_groups(&compile("(.?)+", "").unwrap(), "aa", &["a"]);
 }

From 09411d8087e06a2979bbdd618ceb132295a30f3b Mon Sep 17 00:00:00 2001
From: y21 <30553356+y21@users.noreply.github.com>
Date: Mon, 31 Mar 2025 21:22:52 +0200
Subject: [PATCH 3/5] add `NodeKind::End` to properly indicate matched string
 length

---
 crates/dash_regex/src/graph/build.rs | 72 +++++++++++++++-------------
 crates/dash_regex/src/graph/eval.rs  | 22 +++++----
 crates/dash_regex/src/graph/mod.rs   |  2 +-
 crates/dash_regex/src/graph/node.rs  |  1 +
 4 files changed, 53 insertions(+), 44 deletions(-)

diff --git a/crates/dash_regex/src/graph/build.rs b/crates/dash_regex/src/graph/build.rs
index 59dd2fce..a635bd7d 100644
--- a/crates/dash_regex/src/graph/build.rs
+++ b/crates/dash_regex/src/graph/build.rs
@@ -45,24 +45,24 @@ pub fn number_groups(regex: &ParsedRegex) -> CaptureGroupMap {
     map
 }
 
-pub fn build(group_numbers: &CaptureGroupMap, regex: &ParsedRegex) -> (Graph, Option<NodeId>) {
+pub fn build(group_numbers: &CaptureGroupMap, regex: &ParsedRegex) -> (Graph, NodeId) {
     fn lower_repetition(
         graph: &mut BuildGraph,
         group_numbers: &CaptureGroupMap,
         node: &ParseNode,
         min: u32,
         max: Option<u32>,
-        next: Option<NodeId>,
+        next: NodeId,
     ) -> NodeId {
         let end_id = graph.push(Node {
-            next,
+            next: Some(next),
             kind: NodeKind::RepetitionEnd {
                 start: NodeId::DUMMY, // will be set later
             },
         });
-        let inner_id = inner(graph, group_numbers, slice::from_ref(node), Some(end_id)).unwrap();
+        let inner_id = inner(graph, group_numbers, slice::from_ref(node), end_id);
         let start_id = graph.push(Node {
-            next,
+            next: Some(next),
             kind: NodeKind::RepetitionStart {
                 min,
                 max,
@@ -80,26 +80,26 @@ pub fn build(group_numbers: &CaptureGroupMap, regex: &ParsedRegex) -> (Graph, Op
         graph: &mut BuildGraph,
         group_numbers: &CaptureGroupMap,
         nodes: &[ParseNode],
-        outer_next: Option<NodeId>,
-    ) -> Option<NodeId> {
+        outer_next: NodeId,
+    ) -> NodeId {
         if let Some((current, rest)) = nodes.split_first() {
             let next = inner(graph, group_numbers, rest, outer_next);
             match *current {
-                ParseNode::AnyCharacter => Some(graph.push(Node {
-                    next,
+                ParseNode::AnyCharacter => graph.push(Node {
+                    next: Some(next),
                     kind: NodeKind::AnyCharacter,
-                })),
-                ParseNode::MetaSequence(meta) => Some(graph.push(Node {
-                    next,
+                }),
+                ParseNode::MetaSequence(meta) => graph.push(Node {
+                    next: Some(next),
                     kind: NodeKind::Meta(meta),
-                })),
+                }),
                 ParseNode::Repetition { ref node, min, max } => {
-                    Some(lower_repetition(graph, group_numbers, node, min, max, next))
+                    lower_repetition(graph, group_numbers, node, min, max, next)
                 }
-                ParseNode::LiteralCharacter(literal) => Some(graph.push(Node {
-                    next,
+                ParseNode::LiteralCharacter(literal) => graph.push(Node {
+                    next: Some(next),
                     kind: NodeKind::Literal(literal),
-                })),
+                }),
                 ParseNode::CharacterClass(ref parse_items) => {
                     let items = parse_items
                         .iter()
@@ -118,35 +118,35 @@ pub fn build(group_numbers: &CaptureGroupMap, regex: &ParsedRegex) -> (Graph, Op
                         })
                         .collect::<Box<[_]>>();
 
-                    Some(graph.push(Node {
-                        next,
+                    graph.push(Node {
+                        next: Some(next),
                         kind: NodeKind::CharacterClass(items),
-                    }))
+                    })
                 }
-                ParseNode::Anchor(anchor) => Some(graph.push(Node {
-                    next,
+                ParseNode::Anchor(anchor) => graph.push(Node {
+                    next: Some(next),
                     kind: NodeKind::Anchor(anchor),
-                })),
+                }),
                 ParseNode::Or(ref left, ref right) => {
-                    let left = inner(graph, group_numbers, left, next).unwrap();
-                    let right = inner(graph, group_numbers, right, next).unwrap();
-                    Some(graph.push(Node {
-                        next,
+                    let left = inner(graph, group_numbers, left, next);
+                    let right = inner(graph, group_numbers, right, next);
+                    graph.push(Node {
+                        next: Some(next),
                         kind: NodeKind::Or(left, right),
-                    }))
+                    })
                 }
-                ParseNode::Optional(ref node) => Some(lower_repetition(graph, group_numbers, node, 0, Some(1), next)),
+                ParseNode::Optional(ref node) => lower_repetition(graph, group_numbers, node, 0, Some(1), next),
                 ParseNode::Group(_, ref nodes) => {
                     let group_id = group_numbers.get(&(current as *const ParseNode)).copied();
                     let end = graph.push(Node {
-                        next,
+                        next: Some(next),
                         kind: NodeKind::GroupEnd { group_id },
                     });
-                    let inner_id = inner(graph, group_numbers, nodes, Some(end)).unwrap();
-                    Some(graph.push(Node {
+                    let inner_id = inner(graph, group_numbers, nodes, end);
+                    graph.push(Node {
                         next: Some(inner_id),
                         kind: NodeKind::GroupStart { group_id },
-                    }))
+                    })
                 }
             }
         } else {
@@ -155,6 +155,10 @@ pub fn build(group_numbers: &CaptureGroupMap, regex: &ParsedRegex) -> (Graph, Op
     }
 
     let mut graph = BuildGraph::new();
-    let root = inner(&mut graph, group_numbers, &regex.nodes, None);
+    let end = graph.push(Node {
+        kind: NodeKind::End,
+        next: None,
+    });
+    let root = inner(&mut graph, group_numbers, &regex.nodes, end);
     (graph.finalize(), root)
 }
diff --git a/crates/dash_regex/src/graph/eval.rs b/crates/dash_regex/src/graph/eval.rs
index c71d5231..81fcfab4 100644
--- a/crates/dash_regex/src/graph/eval.rs
+++ b/crates/dash_regex/src/graph/eval.rs
@@ -20,6 +20,7 @@ struct Shared<'a> {
     graph: &'a Graph,
     /// The offset of `full_input` in the *original* input string.
     offset_from_original: u32,
+    end_offset: Option<u32>,
 }
 impl Shared<'_> {
     /// Returns the offset of the passed in slice relative to the full input.
@@ -149,7 +150,8 @@ fn step(shared: &mut Shared<'_>, cx: Cx<'_>, node_id: NodeId, mut remaining: &[u
         NodeKind::RepetitionEnd { start } => {
             let end_off = shared.offset_of(remaining);
             if cx.current_repetition_start.get().unwrap() == end_off {
-                // We haven't made any progress in this repetition iteration and won't.
+                // We haven't made any progress in this repetition iteration and won't. Treat this as the end of the regex.
+                shared.end_offset = Some(shared.offset_of(remaining));
                 return true;
             } else {
                 return step(shared, cx.for_node(shared, start, node_id, remaining), start, remaining);
@@ -198,6 +200,11 @@ fn step(shared: &mut Shared<'_>, cx: Cx<'_>, node_id: NodeId, mut remaining: &[u
             }
             true
         }
+        NodeKind::End => {
+            shared.end_offset = Some(shared.offset_of(remaining));
+            assert!(node.next.is_none());
+            return true;
+        }
     };
 
     if let Some(next) = node.next {
@@ -215,11 +222,6 @@ pub struct EvalSuccess {
 pub struct NoMatch;
 
 pub fn eval(regex: &Regex, mut input: &[u8]) -> Result<EvalSuccess, NoMatch> {
-    let Some(root) = regex.root else {
-        // Nothing to do for empty regexes.
-        return Ok(EvalSuccess { groups: Box::default() });
-    };
-
     let mut processed_groups = vec![None; regex.group_count as usize].into_boxed_slice();
     let mut pending_groups = vec![(None, None); regex.group_count as usize].into_boxed_slice();
     let mut offset_from_original = 0;
@@ -227,7 +229,7 @@ pub fn eval(regex: &Regex, mut input: &[u8]) -> Result<EvalSuccess, NoMatch> {
         // TODO: add a fast reject path where we find the first required character and seek to it in `input`
         processed_groups[0] = Some((
             offset_from_original,
-            offset_from_original + input.len() as u32,
+            offset_from_original,
             ProcessedGroupState::Confirmed,
         ));
         processed_groups[1..].fill(None);
@@ -239,7 +241,7 @@ pub fn eval(regex: &Regex, mut input: &[u8]) -> Result<EvalSuccess, NoMatch> {
             parent: None,
         };
         let (current_repetition_count, current_repetition_start, outer_cx) =
-            if let NodeKind::RepetitionStart { .. } = regex.graph[root].kind {
+            if let NodeKind::RepetitionStart { .. } = regex.graph[regex.root].kind {
                 (Some(0), Some(0), Some(&outer_cx))
             } else {
                 (None, None, None)
@@ -251,6 +253,7 @@ pub fn eval(regex: &Regex, mut input: &[u8]) -> Result<EvalSuccess, NoMatch> {
             offset_from_original,
             pending_groups: &mut pending_groups,
             processed_groups: &mut processed_groups,
+            end_offset: None,
         };
         let cx = Cx {
             current_repetition_count: Cell::new(current_repetition_count),
@@ -258,7 +261,8 @@ pub fn eval(regex: &Regex, mut input: &[u8]) -> Result<EvalSuccess, NoMatch> {
             parent: outer_cx,
         };
 
-        if step(&mut shared, cx, root, input) {
+        if step(&mut shared, cx, regex.root, input) {
+            processed_groups[0].as_mut().unwrap().1 += shared.end_offset.unwrap();
             return Ok(EvalSuccess {
                 groups: processed_groups,
             });
diff --git a/crates/dash_regex/src/graph/mod.rs b/crates/dash_regex/src/graph/mod.rs
index a0e8b11c..41dbd8c7 100644
--- a/crates/dash_regex/src/graph/mod.rs
+++ b/crates/dash_regex/src/graph/mod.rs
@@ -14,7 +14,7 @@ use crate::parser::ParsedRegex;
 pub struct Regex {
     graph: Graph,
     flags: Flags,
-    root: Option<NodeId>,
+    root: NodeId,
     group_count: u32,
 }
 
diff --git a/crates/dash_regex/src/graph/node.rs b/crates/dash_regex/src/graph/node.rs
index 07afb359..1246d52c 100644
--- a/crates/dash_regex/src/graph/node.rs
+++ b/crates/dash_regex/src/graph/node.rs
@@ -41,6 +41,7 @@ pub enum NodeKind {
     GroupEnd {
         group_id: Option<u32>,
     },
+    End,
 }
 
 #[derive(Debug, Copy, Clone)]

From 8447443bbb129983463649ecb1a5c1914e7107f1 Mon Sep 17 00:00:00 2001
From: y21 <30553356+y21@users.noreply.github.com>
Date: Mon, 31 Mar 2025 21:30:53 +0200
Subject: [PATCH 4/5] fix ^ anchors when retrying substrings

---
 crates/dash_regex/src/graph/eval.rs | 5 ++++-
 crates/dash_regex/src/lib.rs        | 4 ++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/crates/dash_regex/src/graph/eval.rs b/crates/dash_regex/src/graph/eval.rs
index 81fcfab4..69ed7a4d 100644
--- a/crates/dash_regex/src/graph/eval.rs
+++ b/crates/dash_regex/src/graph/eval.rs
@@ -110,7 +110,10 @@ fn step(shared: &mut Shared<'_>, cx: Cx<'_>, node_id: NodeId, mut remaining: &[u
             cx = cx.parent.unwrap();
             current_repetition_count >= min
         }
-        NodeKind::Anchor(Anchor::StartOfString) => remaining.len() == shared.full_input.len(),
+        NodeKind::Anchor(Anchor::StartOfString) => {
+            // Make sure it's both at the start of the current attempt as well as from all previous failed attempts
+            shared.offset_from_original == 0 && remaining.len() == shared.full_input.len()
+        }
         NodeKind::Anchor(Anchor::EndOfString) => remaining.is_empty(),
         NodeKind::Meta(meta) => {
             if let Some((_, rest)) = remaining.split_first().filter(|&(&c, _)| meta.matches(c)) {
diff --git a/crates/dash_regex/src/lib.rs b/crates/dash_regex/src/lib.rs
index 860203a3..69de1d07 100644
--- a/crates/dash_regex/src/lib.rs
+++ b/crates/dash_regex/src/lib.rs
@@ -80,4 +80,8 @@ pub fn test() {
     // Infinite regex needs to terminate eventually
     assert_matches_groups(&compile("(.?)+", "").unwrap(), "", &[""]);
     assert_matches_groups(&compile("(.?)+", "").unwrap(), "aa", &["a"]);
+
+    // ^ anchor must not match when retrying substrings
+    assert!(!compile("^m", "").unwrap().matches("ama"));
+    assert!(compile("^m", "").unwrap().matches("ma"));
 }

From 15e4eb32aca678e56e0f74f90ccaed508c8cb6cb Mon Sep 17 00:00:00 2001
From: y21 <30553356+y21@users.noreply.github.com>
Date: Mon, 31 Mar 2025 21:58:00 +0200
Subject: [PATCH 5/5] dont parse character class contents as arbitrary nodes

---
 crates/dash_regex/src/parser.rs | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/crates/dash_regex/src/parser.rs b/crates/dash_regex/src/parser.rs
index 100c435b..9db3f558 100644
--- a/crates/dash_regex/src/parser.rs
+++ b/crates/dash_regex/src/parser.rs
@@ -153,7 +153,9 @@ impl<'a> Parser<'a> {
                 Some(b'-') => {
                     self.advance();
                     match nodes.last() {
-                        Some(&CharacterClassItem::Node(Node::LiteralCharacter(start))) => {
+                        Some(&CharacterClassItem::Node(Node::LiteralCharacter(start)))
+                            if start.is_ascii_alphanumeric() =>
+                        {
                             let end = self.next_byte().ok_or(Error::UnexpectedEof)?;
                             nodes.pop();
                             nodes.push(CharacterClassItem::Range(start, end));
@@ -161,7 +163,15 @@ impl<'a> Parser<'a> {
                         _ => nodes.push(CharacterClassItem::Node(Node::LiteralCharacter(b'-'))),
                     }
                 }
-                _ => nodes.push(CharacterClassItem::Node(self.parse_primary()?)),
+                Some(b'\\') => {
+                    self.advance();
+                    nodes.push(CharacterClassItem::Node(self.parse_escape()?));
+                }
+                Some(other) => {
+                    self.advance();
+                    nodes.push(CharacterClassItem::Node(Node::LiteralCharacter(other)))
+                }
+                None => break,
             }
         }