diff --git a/dev-guide/src/grammar.md b/dev-guide/src/grammar.md
index 7a4cdea466..341c09522c 100644
--- a/dev-guide/src/grammar.md
+++ b/dev-guide/src/grammar.md
@@ -52,25 +52,22 @@ Footnote -> `[^` ~[`]` LF]+ `]`
Quantifier ->
Optional
| Repeat
- | RepeatNonGreedy
| RepeatPlus
- | RepeatPlusNonGreedy
| RepeatRange
| RepeatRangeInclusive
+ | RepeatRangeNamed
Optional -> `?`
Repeat -> `*`
-RepeatNonGreedy -> `*?`
-
RepeatPlus -> `+`
-RepeatPlusNonGreedy -> `+?`
+RepeatRange -> `{` ( Name `:` )? Range? `..` Range? `}`
-RepeatRange -> `{` Range? `..` Range? `}`
+RepeatRangeInclusive -> `{` ( Name `:` )? Range? `..=` Range `}`
-RepeatRangeInclusive -> `{` Range? `..=` Range `}`
+RepeatRangeNamed -> `{` Name `}`
Range -> [0-9]+
@@ -145,11 +142,11 @@ The general format is a series of productions separated by blank lines. The expr
| Optional | Expr? | The preceding expression is optional. |
| NegativeLookahead | !Expr | Matches if Expr does not follow, without consuming any input. |
| Repeat | Expr* | The preceding expression is repeated 0 or more times. |
-| RepeatNonGreedy | Expr*? | The preceding expression is repeated 0 or more times without being greedy. |
| RepeatPlus | Expr+ | The preceding expression is repeated 1 or more times. |
-| RepeatPlusNonGreedy | Expr+? | The preceding expression is repeated 1 or more times without being greedy. |
| RepeatRange | Expr{2..4} | The preceding expression is repeated between the range of times specified. Either bound can be excluded, which works just like Rust ranges. |
| RepeatRangeInclusive | Expr{2..=4} | The preceding expression is repeated between the inclusive range of times specified. The lower bound can be omitted. |
+| Named RepeatRangeInclusive | Expr{name:2..=4} | If a name precedes the range, then the number of repetitions are stored in a variable with that name that subsequent RepeatRangeNamed expressions can refer to. |
+| RepeatRangeNamed | Expr{name} | Repeat the number of times from the previously labeled repetition. |
## Automatic linking
diff --git a/src/notation.md b/src/notation.md
index b74c74b22f..ce3eee2ef8 100644
--- a/src/notation.md
+++ b/src/notation.md
@@ -18,6 +18,8 @@ The following notations are used by the *Lexer* and *Syntax* grammar snippets:
| x+ | _MacroMatch_+ | 1 or more of x |
| xa..b | HEX_DIGIT1..6 | a to b repetitions of x, exclusive of b |
| xa..=b | HEX_DIGIT1..=5 | a to b repetitions of x, inclusive of b |
+| xn:a..=b | `#`n:1..=255 | a labeled repetition that a subsequent repetition can refer to |
+| xn | `#`n | repeat the number of times from the previously labeled repetition |
| Rule1 Rule2 | `fn` _Name_ _Parameters_ | Sequence of rules in order |
| \| | `u8` \| `u16`, Block \| Item | Either one or another |
| ! | !COMMENT | Matches if the expression does not follow, without consuming any input |
diff --git a/src/tokens.md b/src/tokens.md
index d878eabfe2..0f0964bfce 100644
--- a/src/tokens.md
+++ b/src/tokens.md
@@ -214,11 +214,13 @@ r[lex.token.literal.str-raw]
r[lex.token.literal.str-raw.syntax]
```grammar,lexer
-RAW_STRING_LITERAL -> `r` RAW_STRING_CONTENT SUFFIX?
+RAW_STRING_LITERAL ->
+ `r` `"` ^ RAW_STRING_CONTENT `"` SUFFIX?
+ | `r` `#`{n:1..=255} ^ `"` RAW_STRING_CONTENT_HASHED `"` `#`{n} SUFFIX?
-RAW_STRING_CONTENT ->
- `"` ^ ( ~CR )*? `"`
- | `#` RAW_STRING_CONTENT `#`
+RAW_STRING_CONTENT -> (!`"` ~CR )*
+
+RAW_STRING_CONTENT_HASHED -> (!(`"` `#`{n}) ~CR )*
```
r[lex.token.literal.str-raw.intro]
@@ -301,11 +303,12 @@ r[lex.token.str-byte-raw]
r[lex.token.str-byte-raw.syntax]
```grammar,lexer
RAW_BYTE_STRING_LITERAL ->
- `br` RAW_BYTE_STRING_CONTENT SUFFIX?
+ `br` `"` ^ RAW_BYTE_STRING_CONTENT `"` SUFFIX?
+ | `br` `#`{n:1..=255} ^ `"` RAW_BYTE_STRING_CONTENT_HASHED `"` `#`{n} SUFFIX?
+
+RAW_BYTE_STRING_CONTENT -> (!`"` ASCII_FOR_RAW )*
-RAW_BYTE_STRING_CONTENT ->
- `"` ^ ASCII_FOR_RAW*? `"`
- | `#` RAW_BYTE_STRING_CONTENT `#`
+RAW_BYTE_STRING_CONTENT_HASHED -> (!(`"` `#`{n}) ASCII_FOR_RAW )*
ASCII_FOR_RAW -> !CR ASCII
```
@@ -395,11 +398,12 @@ r[lex.token.str-c-raw]
r[lex.token.str-c-raw.syntax]
```grammar,lexer
RAW_C_STRING_LITERAL ->
- `cr` RAW_C_STRING_CONTENT SUFFIX?
+ `cr` `"` ^ RAW_C_STRING_CONTENT `"` SUFFIX?
+ | `cr` `#`{n:1..=255} ^ `"` RAW_C_STRING_CONTENT_HASHED `"` `#`{n} SUFFIX?
+
+RAW_C_STRING_CONTENT -> (!`"` ~[CR NUL] )*
-RAW_C_STRING_CONTENT ->
- `"` ^ ( ~[CR NUL] )*? `"`
- | `#` RAW_C_STRING_CONTENT `#`
+RAW_C_STRING_CONTENT_HASHED -> (!(`"` `#`{n}) ~[CR NUL] )*
```
r[lex.token.str-c-raw.intro]
diff --git a/tools/grammar/src/lib.rs b/tools/grammar/src/lib.rs
index 1d64e45143..c16a07211c 100644
--- a/tools/grammar/src/lib.rs
+++ b/tools/grammar/src/lib.rs
@@ -55,19 +55,18 @@ pub enum ExpressionKind {
NegativeLookahead(Box),
/// `A*`
Repeat(Box),
- /// `A*?`
- RepeatNonGreedy(Box),
/// `A+`
RepeatPlus(Box),
- /// `A+?`
- RepeatPlusNonGreedy(Box),
- /// `A{2..4}` or `A{2..=4}`
+ /// `A{2..4}` or `A{2..=4}` or `A{name:2..=4}`
RepeatRange {
expr: Box,
+ name: Option,
min: Option,
max: Option,
limit: RangeLimit,
},
+ /// `A{name}`
+ RepeatRangeNamed(Box, String),
/// `NonTerminal`
Nt(String),
/// `` `string` ``
@@ -168,10 +167,9 @@ impl Expression {
| ExpressionKind::Optional(e)
| ExpressionKind::NegativeLookahead(e)
| ExpressionKind::Repeat(e)
- | ExpressionKind::RepeatNonGreedy(e)
| ExpressionKind::RepeatPlus(e)
- | ExpressionKind::RepeatPlusNonGreedy(e)
| ExpressionKind::RepeatRange { expr: e, .. }
+ | ExpressionKind::RepeatRangeNamed(e, _)
| ExpressionKind::NegExpression(e)
| ExpressionKind::Cut(e) => {
e.visit_nt(callback);
diff --git a/tools/grammar/src/parser.rs b/tools/grammar/src/parser.rs
index 0db6b478b5..a48674c201 100644
--- a/tools/grammar/src/parser.rs
+++ b/tools/grammar/src/parser.rs
@@ -439,29 +439,38 @@ impl Parser<'_> {
Ok(ExpressionKind::Optional(box_kind(kind)))
}
- /// Parse `*` | `*?` after expression.
+ /// Parse `*` after expression.
fn parse_repeat(&mut self, kind: ExpressionKind) -> Result {
self.expect("*", "expected `*`")?;
- Ok(if self.take_str("?") {
- ExpressionKind::RepeatNonGreedy(box_kind(kind))
- } else {
- ExpressionKind::Repeat(box_kind(kind))
- })
+ Ok(ExpressionKind::Repeat(box_kind(kind)))
}
- /// Parse `+` | `+?` after expression.
+ /// Parse `+` after expression.
fn parse_repeat_plus(&mut self, kind: ExpressionKind) -> Result {
self.expect("+", "expected `+`")?;
- Ok(if self.take_str("?") {
- ExpressionKind::RepeatPlusNonGreedy(box_kind(kind))
- } else {
- ExpressionKind::RepeatPlus(box_kind(kind))
- })
+ Ok(ExpressionKind::RepeatPlus(box_kind(kind)))
}
- /// Parse `{a..b}` | `{a..=b}` after expression.
+ /// Parse `{a..b}` | `{a..=b}` | `{name:a..=b}` | `{name}` after expression.
+ //
+ // `name:` before the range is a named binding. `{name}` refers to that binding.
fn parse_repeat_range(&mut self, kind: ExpressionKind) -> Result {
self.expect("{", "expected `{`")?;
+ let start = self.index;
+ let name = match (self.parse_name(), self.peek()) {
+ (Some(name), Some(b':')) => {
+ self.index += 1;
+ Some(name)
+ }
+ (Some(name), Some(b'}')) => {
+ self.index += 1;
+ return Ok(ExpressionKind::RepeatRangeNamed(box_kind(kind), name));
+ }
+ _ => {
+ self.index = start;
+ None
+ }
+ };
let min = self.take_while(&|x| x.is_ascii_digit());
let Ok(min) = (!min.is_empty()).then(|| min.parse::()).transpose() else {
bail!(self, "malformed range start");
@@ -492,6 +501,7 @@ impl Parser<'_> {
self.expect("}", "expected `}`")?;
Ok(ExpressionKind::RepeatRange {
expr: box_kind(kind),
+ name,
min,
max,
limit,
diff --git a/tools/mdbook-spec/src/grammar/render_markdown.rs b/tools/mdbook-spec/src/grammar/render_markdown.rs
index 316eb9aaf3..d79c949325 100644
--- a/tools/mdbook-spec/src/grammar/render_markdown.rs
+++ b/tools/mdbook-spec/src/grammar/render_markdown.rs
@@ -69,10 +69,9 @@ fn last_expr(expr: &Expression) -> &ExpressionKind {
| ExpressionKind::Optional(_)
| ExpressionKind::NegativeLookahead(_)
| ExpressionKind::Repeat(_)
- | ExpressionKind::RepeatNonGreedy(_)
| ExpressionKind::RepeatPlus(_)
- | ExpressionKind::RepeatPlusNonGreedy(_)
| ExpressionKind::RepeatRange { .. }
+ | ExpressionKind::RepeatRangeNamed(_, _)
| ExpressionKind::Nt(_)
| ExpressionKind::Terminal(_)
| ExpressionKind::Prose(_)
@@ -128,20 +127,13 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, output: &mut String) {
render_expression(e, cx, output);
output.push_str("\\*");
}
- ExpressionKind::RepeatNonGreedy(e) => {
- render_expression(e, cx, output);
- output.push_str("\\* (non-greedy)");
- }
ExpressionKind::RepeatPlus(e) => {
render_expression(e, cx, output);
output.push_str("+");
}
- ExpressionKind::RepeatPlusNonGreedy(e) => {
- render_expression(e, cx, output);
- output.push_str("+ (non-greedy)");
- }
ExpressionKind::RepeatRange {
expr,
+ name,
min,
max,
limit,
@@ -149,12 +141,17 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, output: &mut String) {
render_expression(expr, cx, output);
write!(
output,
- "{min}{limit}{max}",
+ "{name}{min}{limit}{max}",
+ name = name.as_ref().map(|n| format!("{n}:")).unwrap_or_default(),
min = min.map(|v| v.to_string()).unwrap_or_default(),
max = max.map(|v| v.to_string()).unwrap_or_default(),
)
.unwrap();
}
+ ExpressionKind::RepeatRangeNamed(e, name) => {
+ render_expression(e, cx, output);
+ write!(output, "{name}").unwrap();
+ }
ExpressionKind::Nt(nt) => {
let dest = cx.md_link_map.get(nt).map_or("missing", |d| d.as_str());
write!(output, "[{nt}]({dest})").unwrap();
diff --git a/tools/mdbook-spec/src/grammar/render_railroad.rs b/tools/mdbook-spec/src/grammar/render_railroad.rs
index ad7b291e57..aed4e8f151 100644
--- a/tools/mdbook-spec/src/grammar/render_railroad.rs
+++ b/tools/mdbook-spec/src/grammar/render_railroad.rs
@@ -81,6 +81,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option Option Option Option {
- let n = render_expression(e, cx, stack)?;
- let r = Box::new(Optional::new(Repeat::new(n, railroad::Empty)));
- let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string()));
- Box::new(lbox)
- }
// Treat `e+` and `e{1..}` equally.
ExpressionKind::RepeatPlus(e)
| ExpressionKind::RepeatRange {
expr: e,
+ name: _,
min: Some(1),
max: None,
limit: RangeLimit::HalfOpen,
@@ -188,12 +186,6 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option {
- let n = render_expression(e, cx, stack)?;
- let r = Repeat::new(n, railroad::Empty);
- let lbox = LabeledBox::new(r, Comment::new("non-greedy".to_string()));
- Box::new(lbox)
- }
// For `e{..=0}` / `e{0..=0}` or `e{..1}` / `e{0..1}` render an empty node.
ExpressionKind::RepeatRange { max: Some(0), .. }
| ExpressionKind::RepeatRange {
@@ -205,6 +197,7 @@ fn render_expression(expr: &Expression, cx: &RenderCtx, stack: bool) -> Option Option Option Option Option Option unreachable!("closed range must have upper bound"),
+ ExpressionKind::RepeatRangeNamed(e, name) => {
+ let n = render_expression(e, cx, stack)?;
+ let cmt = format!("repeat exactly {name} times");
+ let lbox = LabeledBox::new(n, Comment::new(cmt));
+ Box::new(lbox)
+ }
ExpressionKind::Nt(nt) => node_for_nt(cx, nt),
ExpressionKind::Terminal(t) => Box::new(Terminal::new(t.clone())),
ExpressionKind::Prose(s) => Box::new(Terminal::new(s.clone())),
@@ -405,6 +409,7 @@ mod tests {
fn range_expr(min: Option, max: Option, limit: RangeLimit) -> Expression {
Expression::new_kind(ExpressionKind::RepeatRange {
expr: Box::new(Expression::new_kind(ExpressionKind::Nt("e".to_string()))),
+ name: None,
min,
max,
limit,