Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ target
*.json
*.sh
dash-cli/tests
typeck
jimp-testing
17 changes: 13 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 2 additions & 7 deletions crates/dash_compiler/src/instruction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,8 @@ impl InstructionBuilder<'_, '_> {
Ok(())
}

pub fn build_regex_constant(
&mut self,
regex: dash_regex::ParsedRegex,
flags: dash_regex::Flags,
sym: Symbol,
) -> Result<(), LimitExceededError> {
let RegexConstant(id) = self.current_function_mut().cp.add_regex((regex, flags, sym))?;
pub fn build_regex_constant(&mut self, regex: dash_regex::Regex, sym: Symbol) -> Result<(), LimitExceededError> {
let RegexConstant(id) = self.current_function_mut().cp.add_regex((regex, sym))?;
self.write_instr(Instruction::Regex);
self.writew(id);
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion crates/dash_compiler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ impl Visitor<Result<(), Error>> for FunctionCompiler<'_> {
LiteralExpr::Number(n) => ib.build_number_constant(n),
LiteralExpr::String(s) => ib.build_string_constant(s),
LiteralExpr::Identifier(_) => unreachable!("identifiers are handled in visit_identifier_expression"),
LiteralExpr::Regex(regex, flags, sym) => ib.build_regex_constant(regex, flags, sym),
LiteralExpr::Regex(regex, sym) => ib.build_regex_constant(regex, sym),
LiteralExpr::Null => ib.build_null_constant(),
LiteralExpr::Undefined => ib.build_undefined_constant(),
};
Expand Down
2 changes: 1 addition & 1 deletion crates/dash_decompiler/src/decompiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ impl<'interner, 'buf> FunctionDecompiler<'interner, 'buf> {
),
Instruction::Regex => (
"regex",
&self.interner.resolve(self.constants.regexes[RegexConstant(id)].2) as &dyn fmt::Display,
&self.interner.resolve(self.constants.regexes[RegexConstant(id)].1) as &dyn fmt::Display,
),
_ => unreachable!(),
};
Expand Down
6 changes: 3 additions & 3 deletions crates/dash_middle/src/compiler/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use core::fmt;
use std::cell::Cell;
use std::rc::Rc;

use dash_regex::{Flags, ParsedRegex};
use dash_regex::Regex;

use crate::index_type;
use crate::indexvec::IndexThinVec;
Expand Down Expand Up @@ -96,7 +96,7 @@ pub struct ConstantPool {
pub symbols: IndexThinVec<Symbol, SymbolConstant>,
pub booleans: IndexThinVec<bool, BooleanConstant>,
pub functions: IndexThinVec<Rc<Function>, FunctionConstant>,
pub regexes: IndexThinVec<(ParsedRegex, Flags, Symbol), RegexConstant>,
pub regexes: IndexThinVec<(Regex, Symbol), RegexConstant>,
}

pub struct LimitExceededError;
Expand All @@ -120,6 +120,6 @@ impl ConstantPool {
add_symbol(symbols, Symbol) -> SymbolConstant,
add_boolean(booleans, bool) -> BooleanConstant,
add_function(functions, Rc<Function>) -> FunctionConstant,
add_regex(regexes, (ParsedRegex, Flags, Symbol)) -> RegexConstant
add_regex(regexes, (Regex, Symbol)) -> RegexConstant
);
}
8 changes: 4 additions & 4 deletions crates/dash_middle/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ impl ExprKind {
Self::Literal(LiteralExpr::Undefined)
}

pub fn regex_literal(regex: dash_regex::ParsedRegex, flags: dash_regex::Flags, source: Symbol) -> Self {
Self::Literal(LiteralExpr::Regex(regex, flags, source))
pub fn regex_literal(regex: dash_regex::Regex, source: Symbol) -> Self {
Self::Literal(LiteralExpr::Regex(regex, source))
}

/// Creates a function call expression
Expand Down Expand Up @@ -551,8 +551,8 @@ pub enum LiteralExpr {
#[display(fmt = "\"{_0}\"")]
String(Symbol),

#[display(fmt = "/{_2}/")]
Regex(dash_regex::ParsedRegex, dash_regex::Flags, Symbol),
#[display(fmt = "/{_1}/")]
Regex(dash_regex::Regex, Symbol),

#[display(fmt = "null")]
Null,
Expand Down
14 changes: 4 additions & 10 deletions crates/dash_parser/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use dash_middle::parser::statement::{
StatementKind,
};
use dash_middle::sourcemap::Span;
use dash_regex::Flags;

use crate::{Parser, any};

Expand Down Expand Up @@ -874,14 +873,9 @@ impl Parser<'_, '_> {
// Trim / prefix and suffix
let full = self.interner.resolve(literal);
let full = &full[1..full.len() - 1];
let (nodes, flags) = match dash_regex::Parser::new(full.as_bytes()).parse_all().and_then(|node| {
self.interner
.resolve(flags)
.parse::<Flags>()
.map_err(Into::into)
.map(|flags| (node, flags))
}) {
Ok((nodes, flags)) => (nodes, flags),
let flags = self.interner.resolve(flags);
let regex = match dash_regex::compile(full, flags) {
Ok(regex) => regex,
Err(err) => {
let tok = *self.previous().unwrap();
self.error(Error::RegexSyntaxError(tok, err));
Expand All @@ -890,7 +884,7 @@ impl Parser<'_, '_> {
};
Expr {
span: current.span,
kind: ExprKind::regex_literal(nodes, flags, literal),
kind: ExprKind::regex_literal(regex, literal),
}
}
other if other.is_identifier() => {
Expand Down
3 changes: 3 additions & 0 deletions crates/dash_regex/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ pub enum Error {
#[error("unexpected character: {}", *.0 as char)]
UnexpectedChar(u8),

#[error("number too large to fit in a u32")]
Overflow,

#[error("{0}")]
Flags(#[from] flags::Error),
}
3 changes: 1 addition & 2 deletions crates/dash_regex/src/flags.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use std::str::FromStr;

use bitflags::bitflags;
use serde::{Deserialize, Serialize};
use thiserror::Error;

bitflags! {
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(feature = "format", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "format", derive(serde::Serialize, serde::Deserialize))]
pub struct Flags: u8 {
const GLOBAL = 1;
const IGNORE_CASE = 2;
Expand Down
164 changes: 164 additions & 0 deletions crates/dash_regex/src/graph/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
use core::slice;
use std::collections::HashMap;

use crate::graph::node::{BuildGraph, CharacterClassItem, Node, NodeId, NodeKind};
use crate::node::{CharacterClassItem as ParsedCharacterClassItem, GroupCaptureMode};

use crate::node::Node as ParseNode;
use crate::parser::ParsedRegex;

use super::node::Graph;

type CaptureGroupMap = HashMap<*const ParseNode, u32>;

pub fn number_groups(regex: &ParsedRegex) -> CaptureGroupMap {
fn inner(map: &mut CaptureGroupMap, _counter: &mut u32, nodes: &[ParseNode]) {
if let Some((node, rest)) = nodes.split_first() {
match node {
ParseNode::Group(id, nodes) => {
if let GroupCaptureMode::Id(id) = *id {
map.insert(node, id.try_into().unwrap());
}

inner(map, _counter, nodes);
}
ParseNode::Optional(node) => inner(map, _counter, slice::from_ref(&**node)),
ParseNode::Or(left, right) => {
inner(map, _counter, left);
inner(map, _counter, right);
}
ParseNode::Repetition { node, .. } => inner(map, _counter, slice::from_ref(&**node)),
ParseNode::AnyCharacter
| ParseNode::MetaSequence(_)
| ParseNode::LiteralCharacter(_)
| ParseNode::CharacterClass(_)
| ParseNode::Anchor(_) => {} // cannot contain group nodes
}

inner(map, _counter, rest);
}
}

let mut map = HashMap::new();
let counter = &mut 0;
inner(&mut map, counter, &regex.nodes);
map
}

pub fn build(group_numbers: &CaptureGroupMap, regex: &ParsedRegex) -> (Graph, NodeId) {
fn lower_repetition(
graph: &mut BuildGraph,
group_numbers: &CaptureGroupMap,
node: &ParseNode,
min: u32,
max: Option<u32>,
next: NodeId,
) -> NodeId {
let end_id = graph.push(Node {
next: Some(next),
kind: NodeKind::RepetitionEnd {
start: NodeId::DUMMY, // will be set later
},
});
let inner_id = inner(graph, group_numbers, slice::from_ref(node), end_id);
let start_id = graph.push(Node {
next: Some(next),
kind: NodeKind::RepetitionStart {
min,
max,
inner: inner_id,
},
});
let NodeKind::RepetitionEnd { start } = &mut graph[end_id].kind else {
unreachable!()
};
*start = start_id;
start_id
}

fn inner(
graph: &mut BuildGraph,
group_numbers: &CaptureGroupMap,
nodes: &[ParseNode],
outer_next: NodeId,
) -> NodeId {
if let Some((current, rest)) = nodes.split_first() {
let next = inner(graph, group_numbers, rest, outer_next);
match *current {
ParseNode::AnyCharacter => graph.push(Node {
next: Some(next),
kind: NodeKind::AnyCharacter,
}),
ParseNode::MetaSequence(meta) => graph.push(Node {
next: Some(next),
kind: NodeKind::Meta(meta),
}),
ParseNode::Repetition { ref node, min, max } => {
lower_repetition(graph, group_numbers, node, min, max, next)
}
ParseNode::LiteralCharacter(literal) => graph.push(Node {
next: Some(next),
kind: NodeKind::Literal(literal),
}),
ParseNode::CharacterClass(ref parse_items) => {
let items = parse_items
.iter()
.map(|item| match *item {
ParsedCharacterClassItem::Node(ParseNode::AnyCharacter) => CharacterClassItem::AnyCharacter,
ParsedCharacterClassItem::Node(ParseNode::LiteralCharacter(literal)) => {
CharacterClassItem::Literal(literal)
}
ParsedCharacterClassItem::Node(ParseNode::MetaSequence(meta)) => {
CharacterClassItem::Meta(meta)
}
ParsedCharacterClassItem::Node(ref node) => {
panic!("cannot lower {node:?} in character class")
}
ParsedCharacterClassItem::Range(from, to) => CharacterClassItem::Range(from, to),
})
.collect::<Box<[_]>>();

graph.push(Node {
next: Some(next),
kind: NodeKind::CharacterClass(items),
})
}
ParseNode::Anchor(anchor) => graph.push(Node {
next: Some(next),
kind: NodeKind::Anchor(anchor),
}),
ParseNode::Or(ref left, ref right) => {
let left = inner(graph, group_numbers, left, next);
let right = inner(graph, group_numbers, right, next);
graph.push(Node {
next: Some(next),
kind: NodeKind::Or(left, right),
})
}
ParseNode::Optional(ref node) => lower_repetition(graph, group_numbers, node, 0, Some(1), next),
ParseNode::Group(_, ref nodes) => {
let group_id = group_numbers.get(&(current as *const ParseNode)).copied();
let end = graph.push(Node {
next: Some(next),
kind: NodeKind::GroupEnd { group_id },
});
let inner_id = inner(graph, group_numbers, nodes, end);
graph.push(Node {
next: Some(inner_id),
kind: NodeKind::GroupStart { group_id },
})
}
}
} else {
outer_next
}
}

let mut graph = BuildGraph::new();
let end = graph.push(Node {
kind: NodeKind::End,
next: None,
});
let root = inner(&mut graph, group_numbers, &regex.nodes, end);
(graph.finalize(), root)
}
Loading