From 620ec97f623f613984609d954b4d76c9ed487f1e Mon Sep 17 00:00:00 2001
From: Dmitry Sharabin <dmitrysharabin@gmail.com>
Date: Tue, 18 Nov 2025 23:14:00 +0100
Subject: [PATCH 1/7] First stab at functional `$inside`

---
 src/core/tokenize/match.js                    | 75 +++++++++++++++---
 src/core/tokenize/util.js                     | 46 ++++++++++-
 src/languages/markdown.js                     | 79 ++++---------------
 src/shared/util.js                            | 10 +++
 .../markdown/code-block_feature.html.test     |  4 +-
 ...block_language_detection_feature.html.test |  8 +-
 tests/languages/markdown/code_feature.test    |  2 +-
 7 files changed, 138 insertions(+), 86 deletions(-)

diff --git a/src/core/tokenize/match.js b/src/core/tokenize/match.js
index a151c36642..6a4763e40c 100644
--- a/src/core/tokenize/match.js
+++ b/src/core/tokenize/match.js
@@ -1,7 +1,7 @@
 import { Token } from '../classes/token.js';
 import singleton from '../prism.js';
 import { tokenize } from './tokenize.js';
-import { resolve } from './util.js';
+import { resolve, tokenizeByNamedGroups } from './util.js';
 
 /**
  * @this {Prism}
@@ -21,7 +21,12 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
 
 	for (const token in grammar) {
 		const tokenValue = grammar[token];
-		if (!grammar.hasOwnProperty(token) || token.startsWith('$') || !tokenValue) {
+		if (
+			!grammar.hasOwnProperty(token) ||
+			token.startsWith('$') ||
+			!tokenValue ||
+			typeof tokenValue === 'function'
+		) {
 			continue;
 		}
 
@@ -36,9 +41,20 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
 			let { pattern, lookbehind = false, greedy = false, alias, inside } = patternObj;
 			const insideGrammar = resolve.call(prism, inside);
 
+			let flagsToAdd = '';
+
 			if (greedy && !pattern.global) {
 				// Without the global flag, lastIndex won't work
-				patternObj.pattern = pattern = RegExp(pattern.source, pattern.flags + 'g');
+				flagsToAdd += 'g';
+			}
+
+			if (pattern.source?.includes('(?<') && pattern.hasIndices === false) {
+				// Has named groups, we need to be able to capture their indices
+				flagsToAdd += 'd';
+			}
+
+			if (flagsToAdd) {
+				patternObj.pattern = pattern = RegExp(pattern.source, pattern.flags + flagsToAdd);
 			}
 
 			for (
@@ -63,7 +79,8 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
 				}
 
 				let removeCount = 1; // this is the to parameter of removeBetween
-				let match;
+				/** @type {RegExpExecArray | null} */
+				let match = null;
 
 				if (greedy) {
 					match = matchPattern(pattern, pos, text, lookbehind);
@@ -117,6 +134,10 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
 
 				const from = match.index;
 				const matchStr = match[0];
+
+				/** @type {TokenStream | string} */
+				let content = matchStr;
+
 				const before = str.slice(0, from);
 				const after = str.slice(from + matchStr.length);
 
@@ -134,14 +155,42 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
 
 				tokenList.removeRange(removeFrom, removeCount);
 
-				const wrapped = new Token(
-					token,
-					insideGrammar
-						? tokenize.call(prism, matchStr, /** @type {Grammar} */ (insideGrammar))
-						: matchStr,
-					alias,
-					matchStr
-				);
+				const byGroups = match.groups ? tokenizeByNamedGroups(match) : null;
+				if (byGroups && byGroups.length > 1) {
+					content = byGroups
+						.map(arg => {
+							let content = typeof arg === 'string' ? arg : arg.content;
+							const type = typeof arg === 'string' ? undefined : arg.type;
+
+							if (insideGrammar) {
+								let localInsideGrammar = type ? insideGrammar[type] : insideGrammar;
+
+								if (typeof localInsideGrammar === 'function') {
+									// Late resolving
+									localInsideGrammar = resolve.call(
+										prism,
+										localInsideGrammar(match.groups)
+									);
+								}
+
+								if (localInsideGrammar) {
+									// @ts-ignore
+									content = tokenize.call(prism, content, localInsideGrammar);
+								}
+							}
+
+							return typeof arg === 'object' && arg.type
+								? new Token(arg.type, content)
+								: content;
+						})
+						.flat(); // Flatten tokens like ['foo']
+				}
+				else if (insideGrammar) {
+					// @ts-ignore
+					content = tokenize.call(prism, content, insideGrammar);
+				}
+
+				const wrapped = new Token(token, content, alias, matchStr);
 				currentNode = tokenList.addAfter(removeFrom, wrapped);
 
 				if (after) {
@@ -216,7 +265,7 @@ function toGrammarToken (pattern) {
 
 /**
  * @import { Prism } from '../prism.js';
- * @import { Grammar, GrammarToken, GrammarTokens, RegExpLike } from '../../types.d.ts';
+ * @import { Grammar, GrammarToken, GrammarTokens, TokenStream, RegExpLike } from '../../types.d.ts';
  */
 
 /**
diff --git a/src/core/tokenize/util.js b/src/core/tokenize/util.js
index d4b0ce94dd..a7573a9d0b 100644
--- a/src/core/tokenize/util.js
+++ b/src/core/tokenize/util.js
@@ -1,9 +1,10 @@
+import { camelToKebabCase } from '../../shared/util.js';
 import singleton from '../prism.js';
 
 /**
  * @this {Prism}
- * @param {Grammar | string | null | undefined} reference
- * @returns {Grammar | undefined}
+ * @param {Grammar | string | Function | null | undefined} reference
+ * @returns {Grammar | Function | undefined}
  */
 export function resolve (reference) {
 	const prism = this ?? singleton;
@@ -13,6 +14,11 @@ export function resolve (reference) {
 		ret = prism.languageRegistry.getLanguage(ret)?.resolvedGrammar;
 	}
 
+	if (typeof ret === 'function' && ret.length === 0) {
+		// Function with no arguments, resolve eagerly
+		ret = ret.call(prism);
+	}
+
 	if (typeof ret === 'object' && ret.$rest) {
 		const restGrammar = resolve.call(prism, ret.$rest) ?? {};
 		if (typeof restGrammar === 'object') {
@@ -25,6 +31,42 @@ export function resolve (reference) {
 	return /** @type {Grammar | undefined} */ (ret);
 }
 
+/**
+ *
+ * @param {RegExpExecArray} match
+ * @returns {({type: string, content: string} | string)[]}
+ */
+export function tokenizeByNamedGroups (match) {
+	const str = match[0];
+	const result = [];
+	let i = 0;
+
+	const entries = Object.entries(match.indices?.groups || {})
+		.map(([type, [start, end]]) => ({
+			type,
+			start: start - match.index,
+			end: end - match.index,
+		}))
+		.sort((a, b) => a.start - b.start);
+
+	for (let { type, start, end } of entries) {
+		if (start > i) {
+			result.push(str.slice(i, start));
+		}
+
+		const content = str.slice(start, end);
+		type = camelToKebabCase(type);
+		result.push({ type, content });
+		i = end;
+	}
+
+	if (i < str.length) {
+		result.push(str.slice(i));
+	}
+
+	return result;
+}
+
 /**
  * @import { Prism } from '../prism.js';
  * @import { Grammar, LanguageRegistry } from '../../types.d.ts';
diff --git a/src/languages/markdown.js b/src/languages/markdown.js
index 3b09d1e0e9..846c14b97b 100644
--- a/src/languages/markdown.js
+++ b/src/languages/markdown.js
@@ -99,73 +99,24 @@ export default {
 					// ```optional language
 					// code block
 					// ```
-					pattern: /^```[\s\S]*?^```$/m,
-					greedy: true,
-					inside: /** @type {Grammar} */ ({
-						'code-block': {
-							pattern: /^(```.*(?:\n|\r\n?))[\s\S]+?(?=(?:\n|\r\n?)^```$)/m,
-							lookbehind: true,
-						},
-						'code-language': {
-							pattern: /^(```).+/,
-							lookbehind: true,
-						},
-						'punctuation': /```/,
-						/** @type {Grammar['$tokenize']} */
-						$tokenize (code, grammar, Prism) {
-							const tokens = Prism.tokenize(code, withoutTokenize(grammar));
-
-							/*
-							 * Add the correct `language-xxxx` class to this code block. Keep in mind that the `code-language` token
-							 * is optional. But the grammar is defined so that there is only one case we have to handle:
-							 *
-							 * token.content = [
-							 *     <span class="punctuation">```</span>,
-							 *     <span class="code-language">xxxx</span>,
-							 *     '\n', // exactly one new lines (\r or \n or \r\n)
-							 *     <span class="code-block">...</span>,
-							 *     '\n', // exactly one new lines again
-							 *     <span class="punctuation">```</span>
-							 * ];
-							 */
-
-							const codeLang = tokens[1];
-							const codeBlock = tokens[3];
-
-							if (
-								typeof codeLang === 'object' &&
-								typeof codeBlock === 'object' &&
-								codeLang.type === 'code-language' &&
-								codeBlock.type === 'code-block'
-							) {
-								// this might be a language that Prism does not support
-
-								// do some replacements to support C++, C#, and F#
-								const lang = getTextContent(codeLang.content)
-									.replace(/\b#/g, 'sharp')
-									.replace(/\b\+\+/g, 'pp');
-								// only use the first word
-								const langName = /[a-z][\w-]*/i.exec(lang)?.[0].toLowerCase();
-								if (langName) {
-									codeBlock.addAlias('language-' + langName);
-
-									const grammar =
-										Prism.languageRegistry.getLanguage(lang)?.resolvedGrammar;
-									if (grammar) {
-										codeBlock.content = Prism.tokenize(
-											getTextContent(codeBlock),
-											grammar
-										);
-									}
-									else {
-										codeBlock.addAlias('needs-highlighting');
-									}
+					pattern:
+						/^```(?:\s*)(?<codeLanguage>\{[^{}]*\}|[a-z+#-]+)(?:\n|\r\n?)(?<codeBlock>[\s\S]*?)(?:\n|\r\n?)```$/im,
+					inside: {
+						'code-block': groups => {
+							let lang = groups.codeLanguage;
+							// Extract language code from curly braces like {r pressure, echo=FALSE} → r
+							if (lang.startsWith('{') && lang.endsWith('}')) {
+								const match = lang.slice(1, -1).match(/^(?:\s*)([a-z+#-]+)/i);
+								if (match) {
+									lang = match[0];
 								}
 							}
-
-							return tokens;
+							// Apply transformations: c++ → cpp, c# → csharp, f# → fsharp, etc.
+							lang = lang.replace(/\b#/g, 'sharp').replace(/\b\+\+/g, 'pp');
+							return lang.toLowerCase();
 						},
-					}),
+						'punctuation': /```/,
+					},
 				},
 			],
 			'title': [
diff --git a/src/shared/util.js b/src/shared/util.js
index 04cb5b9f48..77b9a9e09a 100644
--- a/src/shared/util.js
+++ b/src/shared/util.js
@@ -76,3 +76,13 @@ export function kebabToCamelCase (kebab) {
 	const [first, ...others] = kebab.split(/-/);
 	return first + others.map(capitalize).join('');
 }
+
+/**
+ * Converts the given camel case identifier to a kebab case identifier.
+ *
+ * @param {string} str
+ * @returns
+ */
+export function camelToKebabCase (str) {
+	return (str + '').replace(/[A-Z]/g, l => '-' + l.toLowerCase());
+}
diff --git a/tests/languages/markdown/code-block_feature.html.test b/tests/languages/markdown/code-block_feature.html.test
index 672c4cbca2..f443de39a7 100644
--- a/tests/languages/markdown/code-block_feature.html.test
+++ b/tests/languages/markdown/code-block_feature.html.test
@@ -11,7 +11,7 @@
 <span class="token code">
 	<span class="token punctuation">```</span>
 	<span class="token code-language">html</span>
-	<span class="token code-block language-html">
+	<span class="token code-block">
 		<span class="token tag">
 			<span class="token punctuation">&lt;</span>
 			<span class="token tag">a</span>
@@ -38,7 +38,7 @@
 <span class="token code">
 	<span class="token punctuation">```</span>
 	<span class="token code-language">unknownLanguage</span>
-	<span class="token code-block language-unknownlanguage needs-highlighting">
+	<span class="token code-block">
 		&lt;a href="#foo">Click me!&lt;/a> &amp;amp;
 	</span>
 	<span class="token punctuation">```</span>
diff --git a/tests/languages/markdown/code_block_language_detection_feature.html.test b/tests/languages/markdown/code_block_language_detection_feature.html.test
index 6ccdc0b56e..95b8949427 100644
--- a/tests/languages/markdown/code_block_language_detection_feature.html.test
+++ b/tests/languages/markdown/code_block_language_detection_feature.html.test
@@ -19,27 +19,27 @@ plot(pressure)
 <span class="token code">
 	<span class="token punctuation">```</span>
 	<span class="token code-language">js</span>
-	<span class="token code-block language-js needs-highlighting">let a = 0;</span>
+	<span class="token code-block">let a = 0;</span>
 	<span class="token punctuation">```</span>
 </span>
 
 <span class="token code">
 	<span class="token punctuation">```</span>
 	<span class="token code-language">c++</span>
-	<span class="token code-block language-cpp needs-highlighting">int a = 0;</span>
+	<span class="token code-block">int a = 0;</span>
 	<span class="token punctuation">```</span>
 </span>
 
 <span class="token code">
 	<span class="token punctuation">```</span>
 	<span class="token code-language">c#</span>
-	<span class="token code-block language-csharp needs-highlighting">var a = 0;</span>
+	<span class="token code-block">var a = 0;</span>
 	<span class="token punctuation">```</span>
 </span>
 
 <span class="token code">
 	<span class="token punctuation">```</span>
 	<span class="token code-language">{r pressure, echo=FALSE}</span>
-	<span class="token code-block language-r needs-highlighting">plot(pressure)</span>
+	<span class="token code-block">plot(pressure)</span>
 	<span class="token punctuation">```</span>
 </span>
diff --git a/tests/languages/markdown/code_feature.test b/tests/languages/markdown/code_feature.test
index b2e5a77f83..dda5b33710 100644
--- a/tests/languages/markdown/code_feature.test
+++ b/tests/languages/markdown/code_feature.test
@@ -21,7 +21,7 @@ var a = 0;
 	["code", "\tfoobar\r\n\tcontinuous"],
 
 	["code", [
-		["punctuation", "```"], ["code-language", " js"],
+		["punctuation", "```"], ["code-language", "js"],
 		["code-block", "var a = 0;"],
 		["punctuation", "```"]
 	]]

From ca35296026adfa700ec7916e195e8a4235fb4b08 Mon Sep 17 00:00:00 2001
From: Dmitry Sharabin <dmitrysharabin@gmail.com>
Date: Tue, 18 Nov 2025 19:18:13 +0100
Subject: [PATCH 2/7] [pattern tests] Don't treat named capturing groups as
 unused

---
 tests/pattern-tests.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/pattern-tests.js b/tests/pattern-tests.js
index 5c9e0cd3d0..6b98e079aa 100644
--- a/tests/pattern-tests.js
+++ b/tests/pattern-tests.js
@@ -269,7 +269,8 @@ function testPatterns (getPrism, mainLanguage) {
 		await forEachPattern(({ ast, tokenPath, lookbehindGroup, reportError }) => {
 			forEachCapturingGroup(ast.pattern, ({ group, number }) => {
 				const isLookbehindGroup = group === lookbehindGroup;
-				if (group.references.length === 0 && !isLookbehindGroup) {
+				const isNamedGroup = !!group.name; // named capturing groups are used for tokenization, so they are not unused
+				if (group.references.length === 0 && !isLookbehindGroup && !isNamedGroup) {
 					const fixes = [];
 					fixes.push(
 						`Make this group a non-capturing group ('(?:...)' instead of '(...)'). (It's usually this option.)`

From 75a6d37613860b62e2666eaac7d86a44b81cfd14 Mon Sep 17 00:00:00 2001
From: Dmitry Sharabin <dmitrysharabin@gmail.com>
Date: Wed, 19 Nov 2025 15:01:25 +0100
Subject: [PATCH 3/7] [regex-coverage test] Track patterns when Prism creates
 new `RegExp` objects

The regex coverage test was failing to track pattern matches because Prism creates new `RegExp objects` when adding flags (e.g., `g` or `d`) during tokenization. The original approach intercepted `exec()` on individual regex objects, which missed matches on the newly created `RegExp` instances.

Changes:
- Replace `String(regex)` key with normalized source+flags key to match patterns even when Prism creates new `RegExp` objects with different flags
- Switch from per-regex interception to global `RegExp.prototype.exec` interception to catch all pattern matches, including on new `RegExp` objects
- Use a simple loop instead of `String.replace` in flag normalization to avoid triggering our own `RegExp.exec` interception (which caused infinite loops)

This fixes the tracking issue for patterns like the markdown code block pattern that were previously reported as untested despite being used in tests.
---
 tests/coverage.js | 53 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 10 deletions(-)

diff --git a/tests/coverage.js b/tests/coverage.js
index fa73f562fe..d3a1eedf64 100644
--- a/tests/coverage.js
+++ b/tests/coverage.js
@@ -9,6 +9,28 @@ describe('Pattern test coverage', () => {
 	/** @type {Map<string, PatternData>} */
 	const patterns = new Map();
 
+	/**
+	 * Creates a key for pattern lookup based on source and normalized flags.
+	 * Normalizes flags by removing `g` and `d` (which Prism may add) and sorting the rest.
+	 * Uses a simple loop instead of `String.replace` to avoid triggering our `RegExp.exec` interception.
+	 *
+	 * @param {string} source
+	 * @param {string} flags
+	 * @returns {string}
+	 */
+	function getSourceKey (source, flags) {
+		// Normalize flags: remove 'g' and 'd', then sort
+		let normalizedFlags = '';
+		for (let i = 0; i < flags.length; i++) {
+			const flag = flags[i];
+			if (flag !== 'g' && flag !== 'd') {
+				normalizedFlags += flag;
+			}
+		}
+		normalizedFlags = normalizedFlags.split('').sort().join('');
+		return `${source}|${normalizedFlags}`;
+	}
+
 	/**
 	 * @param {string | string[]} languages
 	 * @returns {Promise<Prism>}
@@ -31,7 +53,9 @@ describe('Pattern test coverage', () => {
 				const regex = makeGlobal(value);
 				object[key] = regex;
 
-				const patternKey = String(regex);
+				// Register with the original regex's source and flags (before making global)
+				// This matches what Prism will use when creating new RegExp objects
+				const patternKey = getSourceKey(value.source, value.flags);
 				let data = patterns.get(patternKey);
 				if (!data) {
 					data = {
@@ -43,21 +67,30 @@ describe('Pattern test coverage', () => {
 					patterns.set(patternKey, data);
 				}
 				data.from.add(tokenPath);
-				const { matches } = data;
-
-				regex.exec = string => {
-					const match = RegExp.prototype.exec.call(regex, string);
-					if (match) {
-						matches.push(match);
-					}
-					return match;
-				};
 			}
 		});
 
 		return Prism;
 	}
 
+	// Intercept RegExp.prototype.exec globally to track all pattern matches.
+	// We use global interception (instead of per-regex interception) because Prism creates new RegExp
+	// objects when adding flags (see src/core/tokenize/match.js). Per-regex interception
+	// would only catch the original regex objects, missing matches on the new ones.
+	// This is safe because we only track patterns that exist in our map.
+	const originalExec = RegExp.prototype.exec;
+	RegExp.prototype.exec = function (string) {
+		const match = originalExec.call(this, string);
+		if (match) {
+			const patternKey = getSourceKey(this.source, this.flags);
+			const data = patterns.get(patternKey);
+			if (data) {
+				data.matches.push(match);
+			}
+		}
+		return match;
+	};
+
 	describe('Register all patterns', () => {
 		it('all', async function () {
 			this.slow(10 * 1000);

From 8752c280849925b087794d8ce771304ae5b1a3f3 Mon Sep 17 00:00:00 2001
From: Dmitry Sharabin <dmitrysharabin@gmail.com>
Date: Wed, 19 Nov 2025 15:05:56 +0100
Subject: [PATCH 4/7] [types] Adjust types and remove redundant imports

---
 src/shared/languages/templating.js | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/shared/languages/templating.js b/src/shared/languages/templating.js
index 1410649070..6c47b7399d 100644
--- a/src/shared/languages/templating.js
+++ b/src/shared/languages/templating.js
@@ -127,9 +127,9 @@ export function templating (code, hostGrammar, templateGrammar, Prism) {
 	hostGrammar = resolve.call(Prism, hostGrammar);
 	templateGrammar = resolve.call(Prism, templateGrammar);
 
-	const { hostCode, tokenStack } = buildPlaceholders(code, templateGrammar, Prism);
+	const { hostCode, tokenStack } = buildPlaceholders(code, /** @type {Grammar | undefined} */ (templateGrammar), Prism);
 
-	const tokens = hostGrammar ? Prism.tokenize(hostCode, hostGrammar) : [hostCode];
+	const tokens = hostGrammar ? Prism.tokenize(hostCode, /** @type {Grammar} */ (hostGrammar)) : [hostCode];
 	insertIntoHostToken(tokens, tokenStack);
 	return tokens;
 }
@@ -145,10 +145,10 @@ export function embeddedIn (hostGrammar) {
 }
 
 /**
- * @import { Prism, Token } from '../../core.js';
- * @import { TokenStream, TokenStack, Grammar, LanguageRegistry} from '../../types.d.ts';
+ * @import { Prism } from '../../core.js';
+ * @import { TokenStream, TokenStack, Grammar } from '../../types.d.ts';
  */
 
 /**
- * @typedef {Grammar | string | undefined | null} GrammarRef
+ * @typedef {Grammar | Function | string | undefined | null} GrammarRef
  */

From 8cb13942020df5dc9f8263cccbd38f7468337b06 Mon Sep 17 00:00:00 2001
From: Dmitry Sharabin <dmitrysharabin@gmail.com>
Date: Thu, 20 Nov 2025 14:58:17 +0100
Subject: [PATCH 5/7] [markdown] Improve language detection

Plus, add tests.
---
 src/languages/markdown.js                            |  2 +-
 .../code_block_language_detection_feature.html.test  | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/languages/markdown.js b/src/languages/markdown.js
index 846c14b97b..e502198d1e 100644
--- a/src/languages/markdown.js
+++ b/src/languages/markdown.js
@@ -100,7 +100,7 @@ export default {
 					// code block
 					// ```
 					pattern:
-						/^```(?:\s*)(?<codeLanguage>\{[^{}]*\}|[a-z+#-]+)(?:\n|\r\n?)(?<codeBlock>[\s\S]*?)(?:\n|\r\n?)```$/im,
+						/^```(?:\s*)(?<codeLanguage>\{[^{}]*\}|[a-z+#-]+)(?:[ \t][^\n\r]*)?(?:\n|\r\n?)(?<codeBlock>[\s\S]*?)(?:\n|\r\n?)```$/im,
 					inside: {
 						'code-block': groups => {
 							let lang = groups.codeLanguage;
diff --git a/tests/languages/markdown/code_block_language_detection_feature.html.test b/tests/languages/markdown/code_block_language_detection_feature.html.test
index 95b8949427..0d1460c6b3 100644
--- a/tests/languages/markdown/code_block_language_detection_feature.html.test
+++ b/tests/languages/markdown/code_block_language_detection_feature.html.test
@@ -14,6 +14,10 @@ var a = 0;
 plot(pressure)
 ```
 
+```js { data-copy="Copy the JavaScript snippet!" }
+let bar = 42;
+```
+
 ----------------------------------------------------
 
 <span class="token code">
@@ -43,3 +47,11 @@ plot(pressure)
 	<span class="token code-block">plot(pressure)</span>
 	<span class="token punctuation">```</span>
 </span>
+
+<span class="token code">
+	<span class="token punctuation">```</span>
+	<span class="token code-language">js</span>
+	{ data-copy="Copy the JavaScript snippet!" }
+	<span class="token code-block">let bar = 42;</span>
+	<span class="token punctuation">```</span>
+</span>
\ No newline at end of file

From 455b5bcfb75828eccaf1b67bde68c8cfdcf90e86 Mon Sep 17 00:00:00 2001
From: Dmitry Sharabin <dmitrysharabin@gmail.com>
Date: Thu, 20 Nov 2025 15:39:14 +0100
Subject: [PATCH 6/7] [markdown] Remove redundant capturing groups

---
 src/languages/markdown.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/languages/markdown.js b/src/languages/markdown.js
index e502198d1e..6ccf0d8b2f 100644
--- a/src/languages/markdown.js
+++ b/src/languages/markdown.js
@@ -100,13 +100,13 @@ export default {
 					// code block
 					// ```
 					pattern:
-						/^```(?:\s*)(?<codeLanguage>\{[^{}]*\}|[a-z+#-]+)(?:[ \t][^\n\r]*)?(?:\n|\r\n?)(?<codeBlock>[\s\S]*?)(?:\n|\r\n?)```$/im,
+						/^```\s*(?<codeLanguage>\{[^{}]*\}|[a-z+#-]+)(?:[ \t][^\n\r]*)?(?:\n|\r\n?)(?<codeBlock>[\s\S]*?)(?:\n|\r\n?)```$/im,
 					inside: {
 						'code-block': groups => {
 							let lang = groups.codeLanguage;
 							// Extract language code from curly braces like {r pressure, echo=FALSE} → r
 							if (lang.startsWith('{') && lang.endsWith('}')) {
-								const match = lang.slice(1, -1).match(/^(?:\s*)([a-z+#-]+)/i);
+								const match = lang.slice(1, -1).match(/^\s*([a-z+#-]+)/i);
 								if (match) {
 									lang = match[0];
 								}

From ad8510285c5a32467389e710105ea1486bfdef14 Mon Sep 17 00:00:00 2001
From: Dmitry Sharabin <dmitrysharabin@gmail.com>
Date: Wed, 3 Dec 2025 18:17:19 +0100
Subject: [PATCH 7/7] Address @LeaVerou's feedback: Add comment

---
 src/core/tokenize/match.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/tokenize/match.js b/src/core/tokenize/match.js
index 6a4763e40c..99177fe0f2 100644
--- a/src/core/tokenize/match.js
+++ b/src/core/tokenize/match.js
@@ -25,7 +25,7 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
 			!grammar.hasOwnProperty(token) ||
 			token.startsWith('$') ||
 			!tokenValue ||
-			typeof tokenValue === 'function'
+			typeof tokenValue === 'function' // functional tokens ($inside for now) are handled on L170, and we should ignore them in all other cases
 		) {
 			continue;
 		}