From 96cfbccd0a3864216022b9a204c0b9a97927f1c2 Mon Sep 17 00:00:00 2001 From: MihailK Date: Mon, 3 Nov 2025 20:19:15 +0500 Subject: [PATCH 01/13] =?UTF-8?q?=D0=9F=D1=80=D0=B5=D0=B4=D0=B2=D0=B0?= =?UTF-8?q?=D1=80=D0=B8=D1=82=D0=B5=D0=BB=D1=8C=D0=BD=D1=8B=D0=B9=20=D0=B2?= =?UTF-8?q?=D0=B0=D1=80=D0=B8=D0=B0=D0=BD=D1=82=20Markdown?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Block/Block.cs | 22 +++++++++++++ cs/Markdown/Block/BlockProcessor.cs | 14 ++++++++ cs/Markdown/LineNode/LineNode.cs | 28 ++++++++++++++++ cs/Markdown/Markdown.csproj | 10 ++++++ cs/Markdown/Markdown.csproj.DotSettings | 2 ++ cs/Markdown/Md.cs | 43 +++++++++++++++++++++++++ cs/Markdown/Parser.cs | 14 ++++++++ cs/Markdown/Program.cs | 1 + cs/Markdown/Rendering.cs | 14 ++++++++ cs/Markdown/Token/Token.cs | 10 ++++++ cs/Markdown/Token/TokenProcessor.cs | 14 ++++++++ cs/Markdown/Token/TokenType.cs | 8 +++++ cs/clean-code.sln | 6 ++++ cs/clean-code.sln.DotSettings | 3 ++ 14 files changed, 189 insertions(+) create mode 100644 cs/Markdown/Block/Block.cs create mode 100644 cs/Markdown/Block/BlockProcessor.cs create mode 100644 cs/Markdown/LineNode/LineNode.cs create mode 100644 cs/Markdown/Markdown.csproj create mode 100644 cs/Markdown/Markdown.csproj.DotSettings create mode 100644 cs/Markdown/Md.cs create mode 100644 cs/Markdown/Parser.cs create mode 100644 cs/Markdown/Program.cs create mode 100644 cs/Markdown/Rendering.cs create mode 100644 cs/Markdown/Token/Token.cs create mode 100644 cs/Markdown/Token/TokenProcessor.cs create mode 100644 cs/Markdown/Token/TokenType.cs diff --git a/cs/Markdown/Block/Block.cs b/cs/Markdown/Block/Block.cs new file mode 100644 index 000000000..a023927c5 --- /dev/null +++ b/cs/Markdown/Block/Block.cs @@ -0,0 +1,22 @@ +using System.Collections.Generic; + +namespace Markdown; + +public class Block +{ + public string Raw; +} + +public class ParagraphBlock : Block +{ + public ParagraphBlock(string raw) + { + } +} + +public class HeaderBlock : Block +{ + public HeaderBlock(string raw) + { + } +} \ No newline at end of file diff --git a/cs/Markdown/Block/BlockProcessor.cs b/cs/Markdown/Block/BlockProcessor.cs new file mode 100644 index 000000000..64694c880 --- /dev/null +++ b/cs/Markdown/Block/BlockProcessor.cs @@ -0,0 +1,14 @@ +namespace Markdown; + +public interface IBlockProcessor +{ + IEnumerable SplitToBlocks(string text); +} + +public class BlockProcessor : IBlockProcessor +{ + public IEnumerable SplitToBlocks(string text) + { + yield break; + } +} \ No newline at end of file diff --git a/cs/Markdown/LineNode/LineNode.cs b/cs/Markdown/LineNode/LineNode.cs new file mode 100644 index 000000000..3a1112d1c --- /dev/null +++ b/cs/Markdown/LineNode/LineNode.cs @@ -0,0 +1,28 @@ +using System.Collections.Generic; + +namespace Markdown; + +public class LineNode +{ +} + +public class TextNode : LineNode +{ + public string Text; +} + +public class EmphasisNode : LineNode +{ + public List Children; +} + +public class StrongNode : LineNode +{ + public List Children; +} + +public class LinkNode : LineNode +{ + public string Href; + public List Label; +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..85b49591f --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,10 @@ + + + + Exe + net9.0 + enable + enable + + + diff --git a/cs/Markdown/Markdown.csproj.DotSettings b/cs/Markdown/Markdown.csproj.DotSettings new file mode 100644 index 000000000..0f0cdde96 --- /dev/null +++ b/cs/Markdown/Markdown.csproj.DotSettings @@ -0,0 +1,2 @@ + + True \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs new file mode 100644 index 000000000..051336ead --- /dev/null +++ b/cs/Markdown/Md.cs @@ -0,0 +1,43 @@ +namespace Markdown; + +using System; +using System.Collections.Generic; +using System.Linq; + +public class Md +{ + private readonly BlockProcessor blockProcessor; + private readonly TokenProcessor tokenProcessor; + private readonly Parser parser; + private readonly HtmlRenderer htmlRenderer; + + public Md() + : this(new BlockProcessor(), new TokenProcessor(), new Parser(), new HtmlRenderer()) + { + } + + public Md(BlockProcessor blockProcessor, TokenProcessor tokenProcessor, Parser parser, + HtmlRenderer htmlRenderer) + { + this.blockProcessor = blockProcessor; + this.tokenProcessor = tokenProcessor; + this.parser = parser; + this.htmlRenderer = htmlRenderer; + } + + public string Render(string markdownText) + { + var blocks = blockProcessor.SplitToBlocks(markdownText); + var renderedBlocks = new List(); + foreach (var block in blocks) + { + var raw = block.Raw; + var tokens = tokenProcessor.Tokenize(raw); + var lineNodes = parser.Parse(tokens); + var html = htmlRenderer.Render(lineNodes); + renderedBlocks.Add(html); + } + + return string.Concat(renderedBlocks); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parser.cs b/cs/Markdown/Parser.cs new file mode 100644 index 000000000..f469c7166 --- /dev/null +++ b/cs/Markdown/Parser.cs @@ -0,0 +1,14 @@ +namespace Markdown; + +public interface IParser +{ + IReadOnlyList Parse(IEnumerable tokens); +} + +public class Parser : IParser +{ + public IReadOnlyList Parse(IEnumerable tokens) + { + return System.Array.Empty(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs new file mode 100644 index 000000000..837131c21 --- /dev/null +++ b/cs/Markdown/Program.cs @@ -0,0 +1 @@ +Console.WriteLine("Hello, World!"); \ No newline at end of file diff --git a/cs/Markdown/Rendering.cs b/cs/Markdown/Rendering.cs new file mode 100644 index 000000000..2ecd3a490 --- /dev/null +++ b/cs/Markdown/Rendering.cs @@ -0,0 +1,14 @@ +namespace Markdown; + +public interface IRenderer +{ + string Render(IEnumerable lineNodes); +} + +public class HtmlRenderer : IRenderer +{ + public string Render(IEnumerable lineNodes) + { + return string.Empty; + } +} \ No newline at end of file diff --git a/cs/Markdown/Token/Token.cs b/cs/Markdown/Token/Token.cs new file mode 100644 index 000000000..5ead1d3fa --- /dev/null +++ b/cs/Markdown/Token/Token.cs @@ -0,0 +1,10 @@ +using System.Collections.Generic; + +namespace Markdown; + +public class Token +{ + public TokenType Type; + public string Info; + public int Pos; +} \ No newline at end of file diff --git a/cs/Markdown/Token/TokenProcessor.cs b/cs/Markdown/Token/TokenProcessor.cs new file mode 100644 index 000000000..278c4df9b --- /dev/null +++ b/cs/Markdown/Token/TokenProcessor.cs @@ -0,0 +1,14 @@ +namespace Markdown; + +public interface ITokenProcessor +{ + IEnumerable Tokenize(string input); +} + +public class TokenProcessor : ITokenProcessor +{ + public IEnumerable Tokenize(string input) + { + yield break; + } +} \ No newline at end of file diff --git a/cs/Markdown/Token/TokenType.cs b/cs/Markdown/Token/TokenType.cs new file mode 100644 index 000000000..74489db4a --- /dev/null +++ b/cs/Markdown/Token/TokenType.cs @@ -0,0 +1,8 @@ +namespace Markdown; + +public enum TokenType +{ + Text, + Underscore, + DoubleUnderscore, +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..93a958059 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{3FAFF8B6-E113-410C-AB67-1A9614EE6BB4}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +29,9 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {3FAFF8B6-E113-410C-AB67-1A9614EE6BB4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3FAFF8B6-E113-410C-AB67-1A9614EE6BB4}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3FAFF8B6-E113-410C-AB67-1A9614EE6BB4}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3FAFF8B6-E113-410C-AB67-1A9614EE6BB4}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal diff --git a/cs/clean-code.sln.DotSettings b/cs/clean-code.sln.DotSettings index 135b83ecb..53fe49b2f 100644 --- a/cs/clean-code.sln.DotSettings +++ b/cs/clean-code.sln.DotSettings @@ -1,6 +1,9 @@  <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /> <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb_AaBb" /> + <Policy><Descriptor Staticness="Instance" AccessRightKinds="Private" Description="Instance fields (private)"><ElementKinds><Kind Name="FIELD" /><Kind Name="READONLY_FIELD" /></ElementKinds></Descriptor><Policy Inspect="True" WarnAboutPrefixesAndSuffixes="False" Prefix="" Suffix="" Style="aaBb" /></Policy> + <Policy><Descriptor Staticness="Any" AccessRightKinds="Any" Description="Types and namespaces"><ElementKinds><Kind Name="NAMESPACE" /><Kind Name="CLASS" /><Kind Name="STRUCT" /><Kind Name="ENUM" /><Kind Name="DELEGATE" /></ElementKinds></Descriptor><Policy Inspect="True" WarnAboutPrefixesAndSuffixes="False" Prefix="" Suffix="" Style="AaBb_AaBb" /></Policy> + True True True Imported 10.10.2016 From 4b90615271c5408bdba4655cc3a34d9c3f1b6620 Mon Sep 17 00:00:00 2001 From: MihailK Date: Wed, 12 Nov 2025 13:29:54 +0500 Subject: [PATCH 02/13] =?UTF-8?q?=D0=A1=D0=B4=D0=B5=D0=BB=D0=B0=D0=BB=20?= =?UTF-8?q?=D1=82=D0=B5=D1=81=D1=82=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Block/Block.cs | 22 ---- cs/Markdown/Block/BlockProcessor.cs | 14 --- cs/Markdown/LineNode/INodeGenerator.cs | 8 ++ cs/Markdown/LineNode/LineNode.cs | 28 ----- cs/Markdown/LineNode/Node.cs | 2 + cs/Markdown/LineNode/NodeGenerator.cs | 150 ++++++++++++++++++++++++ cs/Markdown/LineNode/NodeType.cs | 6 + cs/Markdown/Markdown.csproj.DotSettings | 2 - cs/Markdown/MarkdownTest/RenderTest.cs | 9 ++ cs/Markdown/Parser.cs | 14 --- cs/Markdown/Rules/EscapeRule.cs | 6 + cs/Markdown/Rules/HeaderRule.cs | 6 + cs/Markdown/Rules/ITokenRule.cs | 7 ++ cs/Markdown/Rules/NewLineRule.cs | 6 + cs/Markdown/Rules/TextRunRule.cs | 6 + cs/Markdown/Rules/UnderscoreRule.cs | 6 + cs/Markdown/Token/ITokenGenerator.cs | 6 + cs/Markdown/Token/InputCursor.cs | 6 + cs/Markdown/Token/TokenFactory.cs | 6 + cs/Markdown/Token/TokenGenerator.cs | 43 +++++++ cs/Markdown/Token/TokenProcessor.cs | 14 --- 21 files changed, 273 insertions(+), 94 deletions(-) delete mode 100644 cs/Markdown/Block/Block.cs delete mode 100644 cs/Markdown/Block/BlockProcessor.cs create mode 100644 cs/Markdown/LineNode/INodeGenerator.cs delete mode 100644 cs/Markdown/LineNode/LineNode.cs create mode 100644 cs/Markdown/LineNode/Node.cs create mode 100644 cs/Markdown/LineNode/NodeGenerator.cs create mode 100644 cs/Markdown/LineNode/NodeType.cs delete mode 100644 cs/Markdown/Markdown.csproj.DotSettings create mode 100644 cs/Markdown/MarkdownTest/RenderTest.cs delete mode 100644 cs/Markdown/Parser.cs create mode 100644 cs/Markdown/Rules/EscapeRule.cs create mode 100644 cs/Markdown/Rules/HeaderRule.cs create mode 100644 cs/Markdown/Rules/ITokenRule.cs create mode 100644 cs/Markdown/Rules/NewLineRule.cs create mode 100644 cs/Markdown/Rules/TextRunRule.cs create mode 100644 cs/Markdown/Rules/UnderscoreRule.cs create mode 100644 cs/Markdown/Token/ITokenGenerator.cs create mode 100644 cs/Markdown/Token/InputCursor.cs create mode 100644 cs/Markdown/Token/TokenFactory.cs create mode 100644 cs/Markdown/Token/TokenGenerator.cs delete mode 100644 cs/Markdown/Token/TokenProcessor.cs diff --git a/cs/Markdown/Block/Block.cs b/cs/Markdown/Block/Block.cs deleted file mode 100644 index a023927c5..000000000 --- a/cs/Markdown/Block/Block.cs +++ /dev/null @@ -1,22 +0,0 @@ -using System.Collections.Generic; - -namespace Markdown; - -public class Block -{ - public string Raw; -} - -public class ParagraphBlock : Block -{ - public ParagraphBlock(string raw) - { - } -} - -public class HeaderBlock : Block -{ - public HeaderBlock(string raw) - { - } -} \ No newline at end of file diff --git a/cs/Markdown/Block/BlockProcessor.cs b/cs/Markdown/Block/BlockProcessor.cs deleted file mode 100644 index 64694c880..000000000 --- a/cs/Markdown/Block/BlockProcessor.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace Markdown; - -public interface IBlockProcessor -{ - IEnumerable SplitToBlocks(string text); -} - -public class BlockProcessor : IBlockProcessor -{ - public IEnumerable SplitToBlocks(string text) - { - yield break; - } -} \ No newline at end of file diff --git a/cs/Markdown/LineNode/INodeGenerator.cs b/cs/Markdown/LineNode/INodeGenerator.cs new file mode 100644 index 000000000..b0817a70b --- /dev/null +++ b/cs/Markdown/LineNode/INodeGenerator.cs @@ -0,0 +1,8 @@ +using System.Collections.Generic; + +namespace Markdown; + +public interface IParser +{ + IReadOnlyList Parse(IEnumerable tokens); +} \ No newline at end of file diff --git a/cs/Markdown/LineNode/LineNode.cs b/cs/Markdown/LineNode/LineNode.cs deleted file mode 100644 index 3a1112d1c..000000000 --- a/cs/Markdown/LineNode/LineNode.cs +++ /dev/null @@ -1,28 +0,0 @@ -using System.Collections.Generic; - -namespace Markdown; - -public class LineNode -{ -} - -public class TextNode : LineNode -{ - public string Text; -} - -public class EmphasisNode : LineNode -{ - public List Children; -} - -public class StrongNode : LineNode -{ - public List Children; -} - -public class LinkNode : LineNode -{ - public string Href; - public List Label; -} \ No newline at end of file diff --git a/cs/Markdown/LineNode/Node.cs b/cs/Markdown/LineNode/Node.cs new file mode 100644 index 000000000..7637a4635 --- /dev/null +++ b/cs/Markdown/LineNode/Node.cs @@ -0,0 +1,2 @@ +namespace Markdown; + diff --git a/cs/Markdown/LineNode/NodeGenerator.cs b/cs/Markdown/LineNode/NodeGenerator.cs new file mode 100644 index 000000000..9d8643346 --- /dev/null +++ b/cs/Markdown/LineNode/NodeGenerator.cs @@ -0,0 +1,150 @@ +// File: Parser.cs + +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Markdown; + +public class Parser : IParser +{ + public IReadOnlyList Parse(IEnumerable tokens) + { + var list = tokens?.ToList() ?? new List(); + var result = new List(); + var textBuffer = new StringBuilder(); + + // Стек открытых маркеров: тип + индекс начала детей в result + var stack = new Stack<(TokenType Type, int OutIndex, int OpenPos)>(); + + for (int i = 0; i < list.Count; i++) + { + var t = list[i]; + + if (t.Type == TokenType.Text) + { + textBuffer.Append(t.Value); + continue; + } + + FlushTextBuffer(textBuffer, result); + + if (t.Type == TokenType.Underscore) + { + if (IsSingleUnderscoreInsideWord(list, i)) + { + result.Add(new TextNode("_", t.StartPos, t.StartPos + 1)); + continue; + } + + if (TryClose(stack, TokenType.Underscore, result, (open, children) => + new EmphasisNode(children, startPos: open.openPos, endPos: t.StartPos + 1))) + { + continue; + } + + stack.Push((TokenType.Underscore, result.Count, t.StartPos)); + continue; + } + + if (t.Type == TokenType.DoubleUnderscore) + { + if (TryClose(stack, TokenType.DoubleUnderscore, result, (open, children) => + new StrongNode(children, startPos: open.openPos, endPos: t.StartPos + 2))) + { + continue; + } + + stack.Push((TokenType.DoubleUnderscore, result.Count, t.StartPos)); + continue; + } + + // Другие типы при расширении синтаксиса: + // можно добавить HeaderMarker обработку, ссылки и т.д. + } + + FlushTextBuffer(textBuffer, result); + + // Возврат незакрытых маркеров в поток как текст + while (stack.Count > 0) + { + var unclosed = stack.Pop(); + var marker = unclosed.Type == TokenType.Underscore ? "_" : "__"; + result.Insert(unclosed.OutIndex, new TextNode(marker)); + } + + return result; + } + + private static void FlushTextBuffer(StringBuilder sb, List output) + { + if (sb.Length == 0) return; + output.Add(new TextNode(sb.ToString())); + sb.Clear(); + } + + // Закрывает верх стека, если тип совпадает; заворачивает детей в контейнер + // Parser.cs — исправленная версия TryClose и вызовы + + private static bool TryClose( + Stack<(TokenType Type, int OutIndex, int OpenPos)> stack, + TokenType type, + List output, + Func<(int openPos, int outIndex), List, LineNode> makeNode) + { + if (stack.Count == 0) return false; + var open = stack.Peek(); + if (open.Type != type) return false; + + // Снимаем открывающий маркер + stack.Pop(); + + // Забираем детей с позиции открытия + var children = output.Skip(open.OutIndex).ToList(); + output.RemoveRange(open.OutIndex, output.Count - open.OutIndex); + + // Создаём узел, передавая координату открытия + var node = makeNode((open.OpenPos, open.OutIndex), children); + output.Add(node); + return true; + } + + + // Одиночный '_' «внутри слова»: по обе стороны буква/цифра — тогда это текст + private static bool IsSingleUnderscoreInsideWord(IReadOnlyList tokens, int index) + { + bool leftAlnum = false, rightAlnum = false; + + for (int i = index - 1; i >= 0; i--) + { + var t = tokens[i]; + if (t.Type == TokenType.Text && !string.IsNullOrEmpty(t.Value)) + { + char lc = t.Value[t.Value.Length - 1]; + leftAlnum = char.IsLetterOrDigit(lc); + break; + } + else if (t.Type == TokenType.Underscore || t.Type == TokenType.DoubleUnderscore) + { + break; + } + } + + for (int i = index + 1; i < tokens.Count; i++) + { + var t = tokens[i]; + if (t.Type == TokenType.Text && !string.IsNullOrEmpty(t.Value)) + { + char rc = t.Value[0]; + rightAlnum = char.IsLetterOrDigit(rc); + break; + } + else if (t.Type == TokenType.Underscore || t.Type == TokenType.DoubleUnderscore) + { + break; + } + } + + return leftAlnum && rightAlnum; + } +} \ No newline at end of file diff --git a/cs/Markdown/LineNode/NodeType.cs b/cs/Markdown/LineNode/NodeType.cs new file mode 100644 index 000000000..c67509d29 --- /dev/null +++ b/cs/Markdown/LineNode/NodeType.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public enum NodeType +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj.DotSettings b/cs/Markdown/Markdown.csproj.DotSettings deleted file mode 100644 index 0f0cdde96..000000000 --- a/cs/Markdown/Markdown.csproj.DotSettings +++ /dev/null @@ -1,2 +0,0 @@ - - True \ No newline at end of file diff --git a/cs/Markdown/MarkdownTest/RenderTest.cs b/cs/Markdown/MarkdownTest/RenderTest.cs new file mode 100644 index 000000000..10ed88339 --- /dev/null +++ b/cs/Markdown/MarkdownTest/RenderTest.cs @@ -0,0 +1,9 @@ +using NUnit.Framework; + +namespace Markdown; + +[TestFixture] +public class RenderTest +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Parser.cs b/cs/Markdown/Parser.cs deleted file mode 100644 index f469c7166..000000000 --- a/cs/Markdown/Parser.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace Markdown; - -public interface IParser -{ - IReadOnlyList Parse(IEnumerable tokens); -} - -public class Parser : IParser -{ - public IReadOnlyList Parse(IEnumerable tokens) - { - return System.Array.Empty(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Rules/EscapeRule.cs b/cs/Markdown/Rules/EscapeRule.cs new file mode 100644 index 000000000..428ddbe3c --- /dev/null +++ b/cs/Markdown/Rules/EscapeRule.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public class EscapeRule +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Rules/HeaderRule.cs b/cs/Markdown/Rules/HeaderRule.cs new file mode 100644 index 000000000..49a941137 --- /dev/null +++ b/cs/Markdown/Rules/HeaderRule.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public class HeaderRule +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Rules/ITokenRule.cs b/cs/Markdown/Rules/ITokenRule.cs new file mode 100644 index 000000000..e5ef8054d --- /dev/null +++ b/cs/Markdown/Rules/ITokenRule.cs @@ -0,0 +1,7 @@ +namespace Markdown; + +public interface ITokenRule +{ + // Возвращает null при неуспехе. При неуспехе не сдвигает курсор. + Token TryRead(InputCursor cursor, bool atLineStart); +} \ No newline at end of file diff --git a/cs/Markdown/Rules/NewLineRule.cs b/cs/Markdown/Rules/NewLineRule.cs new file mode 100644 index 000000000..e7eb4da83 --- /dev/null +++ b/cs/Markdown/Rules/NewLineRule.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public class NewLineRule +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Rules/TextRunRule.cs b/cs/Markdown/Rules/TextRunRule.cs new file mode 100644 index 000000000..a8a7e6f66 --- /dev/null +++ b/cs/Markdown/Rules/TextRunRule.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public class TextRunRule +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Rules/UnderscoreRule.cs b/cs/Markdown/Rules/UnderscoreRule.cs new file mode 100644 index 000000000..28b4cd32a --- /dev/null +++ b/cs/Markdown/Rules/UnderscoreRule.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public class UnderscoreRule +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Token/ITokenGenerator.cs b/cs/Markdown/Token/ITokenGenerator.cs new file mode 100644 index 000000000..b057a0261 --- /dev/null +++ b/cs/Markdown/Token/ITokenGenerator.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public interface ITokenProcessor +{ + IEnumerable Tokenize(string input); +} \ No newline at end of file diff --git a/cs/Markdown/Token/InputCursor.cs b/cs/Markdown/Token/InputCursor.cs new file mode 100644 index 000000000..80f634912 --- /dev/null +++ b/cs/Markdown/Token/InputCursor.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public class InputCursor +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Token/TokenFactory.cs b/cs/Markdown/Token/TokenFactory.cs new file mode 100644 index 000000000..6b102e7e9 --- /dev/null +++ b/cs/Markdown/Token/TokenFactory.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public class TokenFactory +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Token/TokenGenerator.cs b/cs/Markdown/Token/TokenGenerator.cs new file mode 100644 index 000000000..3a44d05b7 --- /dev/null +++ b/cs/Markdown/Token/TokenGenerator.cs @@ -0,0 +1,43 @@ +using System.Collections.Generic; + +namespace Markdown; + +public class TokenProcessor : ITokenProcessor +{ + private readonly List rules = new() + { + // Порядок важен: сначала более специфичные + new NewLineRule(), + new EscapeRule(), + new HeaderRule(), + new UnderscoreRule(), + new TextRunRule() + }; + + public IEnumerable Tokenize(string input) + { + if (string.IsNullOrEmpty(input)) + yield break; + //todo: подумать про строку из пробелов - IsNullOfWhitespace + + var cursor = new InputCursor(input); + while (!cursor.End) + { + var isLineStart = true; + yield return TryCreateToken(cursor); + } + } + + private Token? TryCreateToken(InputCursor cursor) + { + Token? token = null; + foreach (var rule in rules) + { + token = rule.TryReadTokenAndMoveCursor(cursor); + if (token != null) + return token; + } + + return token; + } +} \ No newline at end of file diff --git a/cs/Markdown/Token/TokenProcessor.cs b/cs/Markdown/Token/TokenProcessor.cs deleted file mode 100644 index 278c4df9b..000000000 --- a/cs/Markdown/Token/TokenProcessor.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace Markdown; - -public interface ITokenProcessor -{ - IEnumerable Tokenize(string input); -} - -public class TokenProcessor : ITokenProcessor -{ - public IEnumerable Tokenize(string input) - { - yield break; - } -} \ No newline at end of file From adcaf401e8946f3ea7d490b12fee5ea51160c739 Mon Sep 17 00:00:00 2001 From: MihailK Date: Wed, 12 Nov 2025 13:32:51 +0500 Subject: [PATCH 03/13] =?UTF-8?q?=D0=A1=D0=B4=D0=B5=D0=BB=D0=B0=D0=BB=20?= =?UTF-8?q?=D1=80=D0=B0=D0=B7=D0=B1=D0=B8=D0=B5=D0=BD=D0=B5=D0=B8=D0=B5=20?= =?UTF-8?q?=D0=BD=D0=B0=20=D1=82=D0=BE=D0=BA=D0=B5=D0=BD=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Rules/EscapeRule.cs | 19 ++++++++++-- cs/Markdown/Rules/HeaderRule.cs | 16 ++++++++-- cs/Markdown/Rules/ITokenRule.cs | 3 +- cs/Markdown/Rules/NewLineRule.cs | 20 ++++++++++-- cs/Markdown/Rules/TextRunRule.cs | 26 ++++++++++++++-- cs/Markdown/Rules/UnderscoreRule.cs | 26 ++++++++++++++-- cs/Markdown/Token/ITokenGenerator.cs | 2 +- cs/Markdown/Token/InputCursor.cs | 46 ++++++++++++++++++++++++++-- cs/Markdown/Token/Token.cs | 5 +-- cs/Markdown/Token/TokenFactory.cs | 5 +-- cs/Markdown/Token/TokenGenerator.cs | 3 +- cs/Markdown/Token/TokenType.cs | 3 ++ 12 files changed, 153 insertions(+), 21 deletions(-) diff --git a/cs/Markdown/Rules/EscapeRule.cs b/cs/Markdown/Rules/EscapeRule.cs index 428ddbe3c..fc5c2a1b0 100644 --- a/cs/Markdown/Rules/EscapeRule.cs +++ b/cs/Markdown/Rules/EscapeRule.cs @@ -1,6 +1,21 @@ namespace Markdown; -public class EscapeRule +public sealed class EscapeRule : ITokenRule { - + public Token? TryReadTokenAndMoveCursor(InputCursor cursor) + { + if (cursor.End) return null; + if (cursor.Current != '\\') return null; + + var pos = cursor.Position; + cursor.Move(1); + if (!cursor.End) + { + var s = cursor.Current.ToString(); + cursor.Move(1); + return TokenFactory.Create(TokenType.Text, s, pos); + } + + return TokenFactory.Create(TokenType.Text, "\\", pos); + } } \ No newline at end of file diff --git a/cs/Markdown/Rules/HeaderRule.cs b/cs/Markdown/Rules/HeaderRule.cs index 49a941137..2da52b900 100644 --- a/cs/Markdown/Rules/HeaderRule.cs +++ b/cs/Markdown/Rules/HeaderRule.cs @@ -1,6 +1,18 @@ namespace Markdown; -public class HeaderRule +public sealed class HeaderRule : ITokenRule { - + public Token? TryReadTokenAndMoveCursor(InputCursor cursor) + { + if (!cursor.IsNewLine() || cursor.End) return null; + + if (cursor.Current == '#' && cursor.Peek() == ' ') + { + var currentPos = cursor.Position; + cursor.Move(2); + return TokenFactory.Create(TokenType.HeaderMarker, "# ", currentPos); + } + + return null; + } } \ No newline at end of file diff --git a/cs/Markdown/Rules/ITokenRule.cs b/cs/Markdown/Rules/ITokenRule.cs index e5ef8054d..84690413e 100644 --- a/cs/Markdown/Rules/ITokenRule.cs +++ b/cs/Markdown/Rules/ITokenRule.cs @@ -2,6 +2,5 @@ public interface ITokenRule { - // Возвращает null при неуспехе. При неуспехе не сдвигает курсор. - Token TryRead(InputCursor cursor, bool atLineStart); + Token? TryReadTokenAndMoveCursor(InputCursor cursor); } \ No newline at end of file diff --git a/cs/Markdown/Rules/NewLineRule.cs b/cs/Markdown/Rules/NewLineRule.cs index e7eb4da83..8fff82a31 100644 --- a/cs/Markdown/Rules/NewLineRule.cs +++ b/cs/Markdown/Rules/NewLineRule.cs @@ -1,6 +1,22 @@ namespace Markdown; -public class NewLineRule +public sealed class NewLineRule : ITokenRule { - + public Token TryReadTokenAndMoveCursor(InputCursor cursor) + { + if (cursor.End) return null; + if (cursor.Current == '\n') + { + var pos = cursor.Position; + cursor.Move(1); + return TokenFactory.Create(TokenType.NewLine, "\n", pos); + } + + if (cursor.Current == '\r') + { + cursor.Move(1); + } + + return null; + } } \ No newline at end of file diff --git a/cs/Markdown/Rules/TextRunRule.cs b/cs/Markdown/Rules/TextRunRule.cs index a8a7e6f66..eb0ee3026 100644 --- a/cs/Markdown/Rules/TextRunRule.cs +++ b/cs/Markdown/Rules/TextRunRule.cs @@ -1,6 +1,28 @@ namespace Markdown; -public class TextRunRule +public sealed class TextRunRule : ITokenRule { - + public Token TryReadTokenAndMoveCursor(InputCursor cursor) + { + if (cursor.End) return null; + + if (cursor.Current == '_' || + cursor.Current == '\n' || + cursor.Current == '\r' || + cursor.Current == '\\') + return null; + + var start = cursor.Position; + while (!cursor.End) + { + var c = cursor.Current; + if (c == '_' || c == '\n' || c == '\r' || c == '\\') break; + cursor.Move(1); + } + + if (cursor.Position > start) + return TokenFactory.Create(TokenType.Text, cursor.Slice(start, cursor.Position), start); + + return null; + } } \ No newline at end of file diff --git a/cs/Markdown/Rules/UnderscoreRule.cs b/cs/Markdown/Rules/UnderscoreRule.cs index 28b4cd32a..97989aedc 100644 --- a/cs/Markdown/Rules/UnderscoreRule.cs +++ b/cs/Markdown/Rules/UnderscoreRule.cs @@ -1,6 +1,28 @@ namespace Markdown; -public class UnderscoreRule +public sealed class UnderscoreRule : ITokenRule { - + public Token TryReadTokenAndMoveCursor(InputCursor cursor) + { + if (cursor.End) return null; + if (cursor.Current != '_') return null; + + var pos = cursor.Position; + + var count = 0; + while (!cursor.End && cursor.Current == '_') + { + count++; + cursor.Move(1); + } + + if (count >= 2) + { + cursor.Revert(pos + 2); + cursor.Move(2); + return TokenFactory.Create(TokenType.DoubleUnderscore, "__", pos); + } + + return TokenFactory.Create(TokenType.Underscore, "_", pos); + } } \ No newline at end of file diff --git a/cs/Markdown/Token/ITokenGenerator.cs b/cs/Markdown/Token/ITokenGenerator.cs index b057a0261..75d1aed00 100644 --- a/cs/Markdown/Token/ITokenGenerator.cs +++ b/cs/Markdown/Token/ITokenGenerator.cs @@ -1,6 +1,6 @@ namespace Markdown; -public interface ITokenProcessor +public interface ITokenGenerator { IEnumerable Tokenize(string input); } \ No newline at end of file diff --git a/cs/Markdown/Token/InputCursor.cs b/cs/Markdown/Token/InputCursor.cs index 80f634912..1c02ec1b8 100644 --- a/cs/Markdown/Token/InputCursor.cs +++ b/cs/Markdown/Token/InputCursor.cs @@ -1,6 +1,48 @@ -namespace Markdown; +using System; + +namespace Markdown; public class InputCursor { - + private readonly string input; + public int Position { get; private set; } + public int Length => input.Length; + public bool End => Position >= input.Length; + + public InputCursor(string input) + { + this.input = input ?? string.Empty; + Position = 0; + } + + public char Current => End ? '\0' : input[Position]; + + public bool IsNewLine() + { + return Position == 0 || input[Position - 1] == '\n'; + } + + public char Peek(int offset = 1) + { + var idx = Position + offset; + return idx >= 0 && idx < input.Length ? input[idx] : '\0'; + } + + public bool StartsWith(string s) + { + if (s is null) return false; + if (Position + s.Length > input.Length) return false; + for (int i = 0; i < s.Length; i++) + if (input[Position + i] != s[i]) + return false; + return true; + } + + public void Move(int count = 1) => + Position = Math.Min(Position + count, input.Length); //todo: подумать, может лучше trymove + + public void Revert(int mark) => Position = mark; + + public string Slice(int start, int end) + => input.Substring(start, Math.Max(0, end - start)); } \ No newline at end of file diff --git a/cs/Markdown/Token/Token.cs b/cs/Markdown/Token/Token.cs index 5ead1d3fa..c99aaea15 100644 --- a/cs/Markdown/Token/Token.cs +++ b/cs/Markdown/Token/Token.cs @@ -5,6 +5,7 @@ namespace Markdown; public class Token { public TokenType Type; - public string Info; - public int Pos; + public required string Value { get; init; } + public int StartPos { get; init; } + public int EndPos => StartPos + Value.Length; } \ No newline at end of file diff --git a/cs/Markdown/Token/TokenFactory.cs b/cs/Markdown/Token/TokenFactory.cs index 6b102e7e9..e35e3ec20 100644 --- a/cs/Markdown/Token/TokenFactory.cs +++ b/cs/Markdown/Token/TokenFactory.cs @@ -1,6 +1,7 @@ namespace Markdown; -public class TokenFactory +public static class TokenFactory { - + public static Token Create(TokenType type, string info, int pos) + => new Token { Type = type, Value = info, StartPos = pos }; } \ No newline at end of file diff --git a/cs/Markdown/Token/TokenGenerator.cs b/cs/Markdown/Token/TokenGenerator.cs index 3a44d05b7..6fc34264e 100644 --- a/cs/Markdown/Token/TokenGenerator.cs +++ b/cs/Markdown/Token/TokenGenerator.cs @@ -2,11 +2,10 @@ namespace Markdown; -public class TokenProcessor : ITokenProcessor +public class TokenGenerator : ITokenGenerator { private readonly List rules = new() { - // Порядок важен: сначала более специфичные new NewLineRule(), new EscapeRule(), new HeaderRule(), diff --git a/cs/Markdown/Token/TokenType.cs b/cs/Markdown/Token/TokenType.cs index 74489db4a..fd1477f90 100644 --- a/cs/Markdown/Token/TokenType.cs +++ b/cs/Markdown/Token/TokenType.cs @@ -5,4 +5,7 @@ public enum TokenType Text, Underscore, DoubleUnderscore, + HeaderMarker, + NewLine, + Escape } \ No newline at end of file From 2d9b125a3c97a600528941ea5dabf75af63bbf98 Mon Sep 17 00:00:00 2001 From: MihailK Date: Wed, 12 Nov 2025 13:35:00 +0500 Subject: [PATCH 04/13] =?UTF-8?q?=D0=A1=D0=B4=D0=B5=D0=BB=D0=B0=D0=BB=20?= =?UTF-8?q?=D0=BF=D1=80=D0=B8=D0=BC=D0=B5=D1=80=D0=BD=D1=83=D1=8E=20=D0=B0?= =?UTF-8?q?=D1=80=D1=85=D0=B8=D1=82=D0=B5=D0=BA=D1=83=D1=82=D1=80=D1=83=20?= =?UTF-8?q?=D0=BD=D0=BE=D0=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/LineNode/INodeGenerator.cs | 4 +- cs/Markdown/LineNode/Node.cs | 7 ++ cs/Markdown/LineNode/NodeGenerator.cs | 151 +------------------------ cs/Markdown/LineNode/NodeType.cs | 6 +- cs/Markdown/Markdown.csproj | 6 + cs/Markdown/MarkdownTest/RenderTest.cs | 116 ++++++++++++++++++- cs/Markdown/Md.cs | 41 +++---- cs/Markdown/Program.cs | 2 +- cs/Markdown/Rendering.cs | 10 +- 9 files changed, 160 insertions(+), 183 deletions(-) diff --git a/cs/Markdown/LineNode/INodeGenerator.cs b/cs/Markdown/LineNode/INodeGenerator.cs index b0817a70b..e50ed0dfe 100644 --- a/cs/Markdown/LineNode/INodeGenerator.cs +++ b/cs/Markdown/LineNode/INodeGenerator.cs @@ -2,7 +2,7 @@ namespace Markdown; -public interface IParser +public interface INodeGenerator { - IReadOnlyList Parse(IEnumerable tokens); + IEnumerable Create(IEnumerable tokens); } \ No newline at end of file diff --git a/cs/Markdown/LineNode/Node.cs b/cs/Markdown/LineNode/Node.cs index 7637a4635..4c4dcaecd 100644 --- a/cs/Markdown/LineNode/Node.cs +++ b/cs/Markdown/LineNode/Node.cs @@ -1,2 +1,9 @@ namespace Markdown; + +public class Node +{ + public NodeType Type; + public string Value; + public List Children; +} \ No newline at end of file diff --git a/cs/Markdown/LineNode/NodeGenerator.cs b/cs/Markdown/LineNode/NodeGenerator.cs index 9d8643346..04801dca2 100644 --- a/cs/Markdown/LineNode/NodeGenerator.cs +++ b/cs/Markdown/LineNode/NodeGenerator.cs @@ -1,150 +1,9 @@ -// File: Parser.cs +namespace Markdown; -using System.Collections.Generic; -using System.Linq; -using System.Text; - -namespace Markdown; - -public class Parser : IParser +public class NodeGenerator : INodeGenerator { - public IReadOnlyList Parse(IEnumerable tokens) - { - var list = tokens?.ToList() ?? new List(); - var result = new List(); - var textBuffer = new StringBuilder(); - - // Стек открытых маркеров: тип + индекс начала детей в result - var stack = new Stack<(TokenType Type, int OutIndex, int OpenPos)>(); - - for (int i = 0; i < list.Count; i++) - { - var t = list[i]; - - if (t.Type == TokenType.Text) - { - textBuffer.Append(t.Value); - continue; - } - - FlushTextBuffer(textBuffer, result); - - if (t.Type == TokenType.Underscore) - { - if (IsSingleUnderscoreInsideWord(list, i)) - { - result.Add(new TextNode("_", t.StartPos, t.StartPos + 1)); - continue; - } - - if (TryClose(stack, TokenType.Underscore, result, (open, children) => - new EmphasisNode(children, startPos: open.openPos, endPos: t.StartPos + 1))) - { - continue; - } - - stack.Push((TokenType.Underscore, result.Count, t.StartPos)); - continue; - } - - if (t.Type == TokenType.DoubleUnderscore) - { - if (TryClose(stack, TokenType.DoubleUnderscore, result, (open, children) => - new StrongNode(children, startPos: open.openPos, endPos: t.StartPos + 2))) - { - continue; - } - - stack.Push((TokenType.DoubleUnderscore, result.Count, t.StartPos)); - continue; - } - - // Другие типы при расширении синтаксиса: - // можно добавить HeaderMarker обработку, ссылки и т.д. - } - - FlushTextBuffer(textBuffer, result); - - // Возврат незакрытых маркеров в поток как текст - while (stack.Count > 0) - { - var unclosed = stack.Pop(); - var marker = unclosed.Type == TokenType.Underscore ? "_" : "__"; - result.Insert(unclosed.OutIndex, new TextNode(marker)); - } - - return result; - } - - private static void FlushTextBuffer(StringBuilder sb, List output) - { - if (sb.Length == 0) return; - output.Add(new TextNode(sb.ToString())); - sb.Clear(); - } - - // Закрывает верх стека, если тип совпадает; заворачивает детей в контейнер - // Parser.cs — исправленная версия TryClose и вызовы - - private static bool TryClose( - Stack<(TokenType Type, int OutIndex, int OpenPos)> stack, - TokenType type, - List output, - Func<(int openPos, int outIndex), List, LineNode> makeNode) - { - if (stack.Count == 0) return false; - var open = stack.Peek(); - if (open.Type != type) return false; - - // Снимаем открывающий маркер - stack.Pop(); - - // Забираем детей с позиции открытия - var children = output.Skip(open.OutIndex).ToList(); - output.RemoveRange(open.OutIndex, output.Count - open.OutIndex); - - // Создаём узел, передавая координату открытия - var node = makeNode((open.OpenPos, open.OutIndex), children); - output.Add(node); - return true; - } - - - // Одиночный '_' «внутри слова»: по обе стороны буква/цифра — тогда это текст - private static bool IsSingleUnderscoreInsideWord(IReadOnlyList tokens, int index) + public IEnumerable Create(IEnumerable tokens) { - bool leftAlnum = false, rightAlnum = false; - - for (int i = index - 1; i >= 0; i--) - { - var t = tokens[i]; - if (t.Type == TokenType.Text && !string.IsNullOrEmpty(t.Value)) - { - char lc = t.Value[t.Value.Length - 1]; - leftAlnum = char.IsLetterOrDigit(lc); - break; - } - else if (t.Type == TokenType.Underscore || t.Type == TokenType.DoubleUnderscore) - { - break; - } - } - - for (int i = index + 1; i < tokens.Count; i++) - { - var t = tokens[i]; - if (t.Type == TokenType.Text && !string.IsNullOrEmpty(t.Value)) - { - char rc = t.Value[0]; - rightAlnum = char.IsLetterOrDigit(rc); - break; - } - else if (t.Type == TokenType.Underscore || t.Type == TokenType.DoubleUnderscore) - { - break; - } - } - - return leftAlnum && rightAlnum; + throw new NotImplementedException(); } -} \ No newline at end of file +} diff --git a/cs/Markdown/LineNode/NodeType.cs b/cs/Markdown/LineNode/NodeType.cs index c67509d29..a34f14a06 100644 --- a/cs/Markdown/LineNode/NodeType.cs +++ b/cs/Markdown/LineNode/NodeType.cs @@ -2,5 +2,9 @@ public enum NodeType { - + Plain, // обычная строка + Header, // заголовок + Emphasis, // целиком строка в _..._ + Strong, // целиком строка в __...__ + Link // ссылка } \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj index 85b49591f..b7a62d102 100644 --- a/cs/Markdown/Markdown.csproj +++ b/cs/Markdown/Markdown.csproj @@ -7,4 +7,10 @@ enable + + + + + + diff --git a/cs/Markdown/MarkdownTest/RenderTest.cs b/cs/Markdown/MarkdownTest/RenderTest.cs index 10ed88339..1b4bedcd5 100644 --- a/cs/Markdown/MarkdownTest/RenderTest.cs +++ b/cs/Markdown/MarkdownTest/RenderTest.cs @@ -1,9 +1,119 @@ -using NUnit.Framework; +using FluentAssertions; +using Markdown; +using NUnit.Framework; -namespace Markdown; +namespace MarkdownTest; -[TestFixture] public class RenderTest { + private readonly Md md = new Md(); + + [Test] + public void Проверка_экранирования() + { + var text = "\\_Вот это\\_"; + var html = md.Render(text); + + html.Should().Be("_Вот это_"); + } + + [Test] + public void Курсив_внутри_жирного() + { + var text = "__a _b_ c__"; + var html = md.Render(text); + + html.Should().Be("a b c"); + } + + [Test] + public void Жирный_внутри_курсива() + { + var text = "_a __b__ c_"; + var html = md.Render(text); + + html.Should().Be("a __b__ c"); + } + [Test] + public void Неправильное_прилипание() + { + var text = "_ a_ bbb _a _"; + var html = md.Render(text); + + html.Should().Be("_ a_ bbb _a _"); + } + + [Test] + public void Renders_Strong_With_Double_Underscore() + { + var text = "Это __жирный__ текст"; + var html = md.Render(text); + + html.Should().Be("Это жирный текст"); + } + + [Test] + public void Renders_Emphasis_With_Single_Underscore_When_Not_Inside_Word() + { + var text = "Текст, _окруженный с двух сторон_ одинарными символами подчерка"; + var html = md.Render(text); + + html.Should().Be("Текст, окруженный с двух сторон одинарными символами подчерка"); + } + + [Test] + public void Does_Not_Emphasize_Single_Underscore_Inside_Word() + { + var text = "вну_три"; + var html = md.Render(text); + + html.Should().Be("вну_три"); + } + + [Test] + public void Renders_Header_H1_For_Line_Starting_With_Hash_Space() + { + var text = "# Заголовок"; + var html = md.Render(text); + + html.Should().Be("

Заголовок

"); + } + + [Test] + public void Keeps_Newline_After_Header_And_Renders_Inline_Text_On_Next_Line() + { + var text = "# Заголовок\n_курсив_ и __жирный__ и вну_три"; + var html = md.Render(text); + + html.Should().Be("

Заголовок

\nкурсив и жирный и вну_три"); + } + + [Test] + public void Supports_Multiple_Headers_In_One_Text() + { + var text = "# Заголовок1\n_курсив_ и __жирный__ и вну_три\n# Заголовок2\nкапибара"; + var html = md.Render(text); + + html.Should() + .Be( + "

Заголовок1

\nкурсив и жирный и вну_три\n

Заголовок2

\nкапибара"); + } + + public static IEnumerable StrongCases() + { + yield return new object[] { "__a__", "a" }; + yield return new object[] { "____", "" }; + yield return + new object[] { "___a__", "_a" }; // одинарное останется как текст + } + + + public static IEnumerable EmphasisCases() + { + yield return new object[] { "_a_", "a" }; + yield return new object[] { "a_b", "a_b" }; // внутри слова не срабатывает + yield return + new object[] { "_ a_", "_ a_" }; // открывающее слитно с пробелом — остаётся как текст + } } \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs index 051336ead..b02181d30 100644 --- a/cs/Markdown/Md.cs +++ b/cs/Markdown/Md.cs @@ -1,4 +1,6 @@ -namespace Markdown; +using System.Text; + +namespace Markdown; using System; using System.Collections.Generic; @@ -6,38 +8,25 @@ public class Md { - private readonly BlockProcessor blockProcessor; - private readonly TokenProcessor tokenProcessor; - private readonly Parser parser; + private readonly TokenGenerator tokenGenerator; + private readonly NodeGenerator nodeGenerator; private readonly HtmlRenderer htmlRenderer; - public Md() - : this(new BlockProcessor(), new TokenProcessor(), new Parser(), new HtmlRenderer()) - { - } - - public Md(BlockProcessor blockProcessor, TokenProcessor tokenProcessor, Parser parser, - HtmlRenderer htmlRenderer) { - this.blockProcessor = blockProcessor; - this.tokenProcessor = tokenProcessor; - this.parser = parser; - this.htmlRenderer = htmlRenderer; + tokenGenerator = new TokenGenerator(); + nodeGenerator = new NodeGenerator(); + htmlRenderer = new HtmlRenderer(); } public string Render(string markdownText) { - var blocks = blockProcessor.SplitToBlocks(markdownText); - var renderedBlocks = new List(); - foreach (var block in blocks) - { - var raw = block.Raw; - var tokens = tokenProcessor.Tokenize(raw); - var lineNodes = parser.Parse(tokens); - var html = htmlRenderer.Render(lineNodes); - renderedBlocks.Add(html); - } + if (string.IsNullOrEmpty(markdownText)) + return string.Empty; + + var tokens = tokenGenerator.Tokenize(markdownText); + var nods = nodeGenerator.Create(tokens); + var html = htmlRenderer.Render(nods); - return string.Concat(renderedBlocks); + return html; } } \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs index 837131c21..5f282702b 100644 --- a/cs/Markdown/Program.cs +++ b/cs/Markdown/Program.cs @@ -1 +1 @@ -Console.WriteLine("Hello, World!"); \ No newline at end of file + \ No newline at end of file diff --git a/cs/Markdown/Rendering.cs b/cs/Markdown/Rendering.cs index 2ecd3a490..6e41574b9 100644 --- a/cs/Markdown/Rendering.cs +++ b/cs/Markdown/Rendering.cs @@ -1,14 +1,16 @@ -namespace Markdown; +using System.Text; + +namespace Markdown; public interface IRenderer { - string Render(IEnumerable lineNodes); + string Render(IEnumerable lineNodes); } public class HtmlRenderer : IRenderer { - public string Render(IEnumerable lineNodes) + public string Render(IEnumerable lines) { - return string.Empty; + throw new NotImplementedException(); } } \ No newline at end of file From 936a2521b9d9ec12fc70c10c120ad4f64a25804c Mon Sep 17 00:00:00 2001 From: MihailK Date: Mon, 17 Nov 2025 17:19:09 +0500 Subject: [PATCH 05/13] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8?= =?UTF-8?q?=D0=BB=20=D0=BD=D0=BE=D0=B2=D1=8B=D0=B5=20=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=B8=D0=BB=D0=B0=20=D0=B4=D0=BB=D1=8F=20=D1=82=D0=BE?= =?UTF-8?q?=D0=BA=D0=B5=D0=BD=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/LineNode/NodeGenerator.cs | 9 - cs/Markdown/Node/HandelProcessor.cs | 257 ++++++++++++++++++ .../{LineNode => Node}/INodeGenerator.cs | 2 +- cs/Markdown/{LineNode => Node}/Node.cs | 0 cs/Markdown/Node/NodeFactory.cs | 6 + cs/Markdown/Node/NodeGenerator.cs | 86 ++++++ cs/Markdown/{LineNode => Node}/NodeType.cs | 0 cs/Markdown/Node/TokenCursor.cs | 6 + .../Token/{InputCursor.cs => TextCursor.cs} | 10 - cs/Markdown/TokenCreateRules/DigitRule.cs | 23 ++ .../{Rules => TokenCreateRules}/EscapeRule.cs | 0 .../{Rules => TokenCreateRules}/HeaderRule.cs | 0 .../{Rules => TokenCreateRules}/ITokenRule.cs | 0 .../NewLineRule.cs | 0 .../TextRunRule.cs | 17 +- .../UnderscoreRule.cs | 2 +- .../TokenCreateRules/WhiteSpaceRule.cs | 25 ++ 17 files changed, 417 insertions(+), 26 deletions(-) delete mode 100644 cs/Markdown/LineNode/NodeGenerator.cs create mode 100644 cs/Markdown/Node/HandelProcessor.cs rename cs/Markdown/{LineNode => Node}/INodeGenerator.cs (62%) rename cs/Markdown/{LineNode => Node}/Node.cs (100%) create mode 100644 cs/Markdown/Node/NodeFactory.cs create mode 100644 cs/Markdown/Node/NodeGenerator.cs rename cs/Markdown/{LineNode => Node}/NodeType.cs (100%) create mode 100644 cs/Markdown/Node/TokenCursor.cs rename cs/Markdown/Token/{InputCursor.cs => TextCursor.cs} (77%) create mode 100644 cs/Markdown/TokenCreateRules/DigitRule.cs rename cs/Markdown/{Rules => TokenCreateRules}/EscapeRule.cs (100%) rename cs/Markdown/{Rules => TokenCreateRules}/HeaderRule.cs (100%) rename cs/Markdown/{Rules => TokenCreateRules}/ITokenRule.cs (100%) rename cs/Markdown/{Rules => TokenCreateRules}/NewLineRule.cs (100%) rename cs/Markdown/{Rules => TokenCreateRules}/TextRunRule.cs (59%) rename cs/Markdown/{Rules => TokenCreateRules}/UnderscoreRule.cs (95%) create mode 100644 cs/Markdown/TokenCreateRules/WhiteSpaceRule.cs diff --git a/cs/Markdown/LineNode/NodeGenerator.cs b/cs/Markdown/LineNode/NodeGenerator.cs deleted file mode 100644 index 04801dca2..000000000 --- a/cs/Markdown/LineNode/NodeGenerator.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown; - -public class NodeGenerator : INodeGenerator -{ - public IEnumerable Create(IEnumerable tokens) - { - throw new NotImplementedException(); - } -} diff --git a/cs/Markdown/Node/HandelProcessor.cs b/cs/Markdown/Node/HandelProcessor.cs new file mode 100644 index 000000000..003e6b2a6 --- /dev/null +++ b/cs/Markdown/Node/HandelProcessor.cs @@ -0,0 +1,257 @@ +namespace Markdown; + +public static class Handel +{ + public static void Escape(List children, TokenCursor tokenCursor) + { + tokenCursor.Move(); + switch (tokenCursor.Current!.Type) + { + case TokenType.NewLine + or TokenType.EndOfText: + children.Add(NodeFactory.Create(NodeType.Text, tokenCursor.Current.Value, null)); + return; + case TokenType.Text: + children.Add(NodeFactory.Create(NodeType.Text, @"\" + tokenCursor.Current.Value, null)); + break; + case TokenType.Escape: + default: + children.Add(NodeFactory.Create(NodeType.Text, tokenCursor.Current.Value, null)); + break; + } + + tokenCursor.Move(); + } + + public static void Underscores( + List children, + List<(TokenType Type, int ChildrenIndex)> underscores, + TokenCursor tokenCursor) + { + var currentToken = tokenCursor.Current; + var previousToken = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position - 1); + var nextToken = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position + 1); + + if (IsUnderscoreBetweenDigits(previousToken, nextToken)) // между числами + { + AddUnderscoreAsLiteral(children, currentToken.Value, tokenCursor); + return; + } + + if (!IsTokenWhitespaceLike(previousToken)) // слева пробел или граница + { + if (TryCloseExistingHighlight(underscores, currentToken, children, tokenCursor)) + return; + } + + + if (IsTokenWhitespaceLike(nextToken) || + (currentToken.Type == TokenType.DoubleUnderscore && DoesDoubleUnderscoreBreak(tokenCursor))) + { + AddUnderscoreAsLiteral(children, currentToken.Value, tokenCursor); + } + else + { + underscores.Add((currentToken.Type, children.Count)); + tokenCursor.Move(); + } + } + + public static void InsertUnmatchedUnderscores(List children, + List<(TokenType Type, int ChildrenIndex)> underscores) + { + for (var i = underscores.Count - 1; i >= 0; i--) + { + var (type, index) = underscores[i]; + var literal = type == TokenType.DoubleUnderscore ? "__" : "_"; + children.Insert(index, NodeFactory.Create(NodeType.Text, literal, null)); + } + } + + private static bool TryCloseExistingHighlight( + List<(TokenType Type, int ChildrenIndex)> underscores, + Token currentToken, + List children, + TokenCursor cursor) + { + var highlightingType = currentToken.Type; + var openerIndex = FindMatchingOpenerIndex(underscores, highlightingType); + if (openerIndex < 0) + return false; + + var openerUnderscore = underscores[openerIndex]; + var startIndex = openerUnderscore.ChildrenIndex; + var innerTokensCount = children.Count - startIndex; + + if (innerTokensCount == 0) + return false; + + var innerTokens = children.GetRange(startIndex, innerTokensCount); + + if (!IsValidHighlighting(underscores, highlightingType, openerUnderscore, innerTokens)) + return false; + + if (HasIntersection(underscores, openerUnderscore, highlightingType, children, openerIndex, out var innerIndex)) + { + InsertIntersection(children, underscores, openerUnderscore, underscores[innerIndex], openerIndex, + innerIndex); + AddUnderscoreAsLiteral(children, currentToken.Value, cursor); + return true; + } + + CloseHighlight(children, underscores, openerIndex, highlightingType, innerTokens, startIndex); + cursor.Move(); + return true; + } + + private static int FindMatchingOpenerIndex(List<(TokenType Type, int ChildrenIndex)> underscores, TokenType type) + { + for (var i = underscores.Count - 1; i >= 0; i--) + { + if (underscores[i].Type == type) + return i; + } + + return -1; + } + + private static bool IsValidHighlighting( + List<(TokenType Type, int ChildrenIndex)> underscores, + TokenType highlightingType, + (TokenType Type, int ChildrenIndex) openerUnderscore, + List innerTokens) + { + if (highlightingType != TokenType.Underscore) + return true; + //точно "_" + var highlightingIsInsideDoubleUnderscore = underscores.Any(underscore => + underscore.Type == TokenType.DoubleUnderscore && + underscore.ChildrenIndex < openerUnderscore.ChildrenIndex); // "_" внутри двойного + + var areThereAnyWhitespaces = HasWhiteSpaceInPlainNodes(innerTokens); // Есть пробелы + + return highlightingIsInsideDoubleUnderscore + || !areThereAnyWhitespaces; + } + + private static bool HasWhiteSpaceInPlainNodes(List innerTokens) + { + if (innerTokens == null || innerTokens.Count == 0) + return false; + + foreach (var t in innerTokens) + { + if (t.Type == NodeType.Text && t.Value != null) + { + var s = t.Value; + for (int j = 0; j < s.Length; j++) + { + if (char.IsWhiteSpace(s[j])) + return true; + } + } + } + + return false; + } + + private static bool HasIntersection( + List<(TokenType Type, int ChildrenIndex)> underscores, + (TokenType Type, int ChildrenIndex) opener, + TokenType highlightingType, + List children, + int openerIndex, + out int intersectionIndex) + { + for (var i = openerIndex + 1; i < underscores.Count; i++) + { + if (underscores[i].Type == highlightingType || + underscores[i].ChildrenIndex <= opener.ChildrenIndex || + underscores[i].ChildrenIndex >= children.Count) + continue; + + intersectionIndex = i; + return true; + } + + intersectionIndex = -1; + return false; + } + + private static void CloseHighlight( + List children, + List<(TokenType Type, int ChildrenIndex)> underscores, + int openerIndex, + TokenType highlightingType, + List innerTokens, + int startIndex) + { + children.RemoveRange(startIndex, innerTokens.Count); + + var node = highlightingType == TokenType.DoubleUnderscore + ? NodeFactory.Create(NodeType.Strong, null, innerTokens) + : NodeFactory.Create(NodeType.Emphasis, null, innerTokens); + + children.Add(node); + underscores.RemoveAt(openerIndex); + } + + private static void InsertIntersection( + List children, + List<(TokenType Type, int ChildrenIndex)> underscores, + (TokenType Type, int ChildrenIndex) opener, + (TokenType Type, int ChildrenIndex) inner, + int openerIndex, int innerIndex) + { + var innerLiteral = inner.Type == TokenType.DoubleUnderscore ? "__" : "_"; + var openerLiteral = opener.Type == TokenType.DoubleUnderscore ? "__" : "_"; + + children.Insert(inner.ChildrenIndex, NodeFactory.Create(NodeType.Text, innerLiteral, null)); + children.Insert(inner.ChildrenIndex, NodeFactory.Create(NodeType.Text, openerLiteral, null)); + + underscores.RemoveAt(innerIndex); + underscores.RemoveAt(openerIndex); + } + + private static void AddUnderscoreAsLiteral(List children, string value, TokenCursor cursor) + { + children.Add(NodeFactory.Create(NodeType.Text, value, null)); + cursor.Move(); + } + + private static bool IsTokenWhitespaceLike(Token? token) + { + return token == null || + token.Type == TokenType.WhiteSpace || + token.Type == TokenType.NewLine || + token.Type == TokenType.EndOfText; + } + + private static bool IsUnderscoreBetweenDigits(Token? previousToken, Token? nextToken) + { + var leftTokenIsDigit = previousToken is { Type: TokenType.Text, Value.Length: > 0 } && + char.IsDigit(previousToken.Value.Last()); + var rightTokenIsDigit = nextToken is { Type: TokenType.Text, Value.Length: > 0 } && + char.IsDigit(nextToken.Value.First()); + return leftTokenIsDigit && rightTokenIsDigit; + } + + private static bool DoesDoubleUnderscoreBreak(TokenCursor cursor) + { + var singleUnderscoresCount = 0; + for (var i = cursor.Position + 1; i < cursor.TokenCount; i++) + { + var currentToken = cursor.tokens[i]; + if (currentToken.Type is TokenType.NewLine or TokenType.EndOfText) return false; + if (currentToken.Type == TokenType.DoubleUnderscore) return (singleUnderscoresCount % 2) == 1; + if (currentToken.Type != TokenType.Underscore) continue; + + var previousToken = cursor.tokens.ElementAtOrDefault(i - 1); + if (previousToken is { Type: TokenType.Escape }) continue; + + singleUnderscoresCount++; + } + + return false; + } +} \ No newline at end of file diff --git a/cs/Markdown/LineNode/INodeGenerator.cs b/cs/Markdown/Node/INodeGenerator.cs similarity index 62% rename from cs/Markdown/LineNode/INodeGenerator.cs rename to cs/Markdown/Node/INodeGenerator.cs index e50ed0dfe..7b74a1651 100644 --- a/cs/Markdown/LineNode/INodeGenerator.cs +++ b/cs/Markdown/Node/INodeGenerator.cs @@ -4,5 +4,5 @@ namespace Markdown; public interface INodeGenerator { - IEnumerable Create(IEnumerable tokens); + List Create(List tokens); } \ No newline at end of file diff --git a/cs/Markdown/LineNode/Node.cs b/cs/Markdown/Node/Node.cs similarity index 100% rename from cs/Markdown/LineNode/Node.cs rename to cs/Markdown/Node/Node.cs diff --git a/cs/Markdown/Node/NodeFactory.cs b/cs/Markdown/Node/NodeFactory.cs new file mode 100644 index 000000000..ca47093a1 --- /dev/null +++ b/cs/Markdown/Node/NodeFactory.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public class NodeFactory +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Node/NodeGenerator.cs b/cs/Markdown/Node/NodeGenerator.cs new file mode 100644 index 000000000..ac5a06532 --- /dev/null +++ b/cs/Markdown/Node/NodeGenerator.cs @@ -0,0 +1,86 @@ +using Markdown; + +public sealed class RuleBasedNodeGenerator : INodeGenerator +{ + private readonly InlinePipeline _inline; + + public RuleBasedNodeGenerator() + { + // Инициализация конвейера: порядок — от более специализированных к общим. + _inline = new InlinePipeline(new INodeRule[] + { + new EscapeRule(), // экранирование [web:30] + new StrongUnderscoreRule(null!), // временно null — заменим после [web:34] + new EmphasisUnderscoreRule(null!),// временно null [web:34] + new PlainTextRule(), // текст по умолчанию [web:36] + new NewLineRule() // перенос строки [web:36] + }); + + // Внедряем ссылку на pipeline в правила, которые её требуют. + ReplacePipeline(_inline); + } + + private void ReplacePipeline(InlinePipeline pipeline) + { + foreach (var rule in pipeline + .GetType() + .GetField("_rules", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)! + .GetValue(pipeline) as List) + { + if (rule is StrongUnderscoreRule s && s.GetType().GetField("_inline", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance) != null) + s.GetType().GetField("_inline", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!.SetValue(s, pipeline); // внедрение контекста [web:34]; + + if (rule is EmphasisUnderscoreRule e && e.GetType().GetField("_inline", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance) != null) + e.GetType().GetField("_inline", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!.SetValue(e, pipeline); // внедрение [web:34]; + } + } + + public List Create(List tokens) + { + var currentHeader = new List(); // буфер строки + foreach (var t in tokens) + { + if (t.Type == TokenType.NewLine) + { + foreach (var node in FlushLine(currentHeader)) + yield return node; // сброс строки + yield return new Node { Type = NodeType.Plain, Value = "\n" }; // перенос + currentHeader.Clear(); // новая строка + continue; + } + + if (currentHeader.Count == 0 && t.Type == TokenType.HeaderMarker) + { + // Собираем до конца строки и обрабатываем заголовок отдельным правилом. + currentHeader.Add(t); // маркер [web:30]. + continue; + } + + currentHeader.Add(t); // копим токены строки [web:35]. + } + + // хвост + foreach (var node in FlushLine(currentHeader)) + yield return node; // финальная строка + } + + private IEnumerable FlushLine(List lineTokens) + { + if (lineTokens.Count == 0) + yield break; // пусто + + if (lineTokens[0].Type == TokenType.HeaderMarker) + { + var headerRule = new HeaderRule(_inline); // правило заголовка + int idx = 0; + var outNodes = new List(); // временный список + headerRule.Apply(lineTokens, ref idx, outNodes); // применяем + foreach (var n in outNodes) yield return n; // отдаём + } + else + { + foreach (var n in _inline.Parse(lineTokens)) + yield return n; // инлайны строки [web:34]. + } + } +} diff --git a/cs/Markdown/LineNode/NodeType.cs b/cs/Markdown/Node/NodeType.cs similarity index 100% rename from cs/Markdown/LineNode/NodeType.cs rename to cs/Markdown/Node/NodeType.cs diff --git a/cs/Markdown/Node/TokenCursor.cs b/cs/Markdown/Node/TokenCursor.cs new file mode 100644 index 000000000..ba646b136 --- /dev/null +++ b/cs/Markdown/Node/TokenCursor.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public class TokenCursor +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Token/InputCursor.cs b/cs/Markdown/Token/TextCursor.cs similarity index 77% rename from cs/Markdown/Token/InputCursor.cs rename to cs/Markdown/Token/TextCursor.cs index 1c02ec1b8..fdd86c321 100644 --- a/cs/Markdown/Token/InputCursor.cs +++ b/cs/Markdown/Token/TextCursor.cs @@ -28,16 +28,6 @@ public char Peek(int offset = 1) return idx >= 0 && idx < input.Length ? input[idx] : '\0'; } - public bool StartsWith(string s) - { - if (s is null) return false; - if (Position + s.Length > input.Length) return false; - for (int i = 0; i < s.Length; i++) - if (input[Position + i] != s[i]) - return false; - return true; - } - public void Move(int count = 1) => Position = Math.Min(Position + count, input.Length); //todo: подумать, может лучше trymove diff --git a/cs/Markdown/TokenCreateRules/DigitRule.cs b/cs/Markdown/TokenCreateRules/DigitRule.cs new file mode 100644 index 000000000..02a9e76bd --- /dev/null +++ b/cs/Markdown/TokenCreateRules/DigitRule.cs @@ -0,0 +1,23 @@ +namespace Markdown; + +public class DigitRule : ITokenRule +{ + public Token? TryReadTokenAndMoveCursor(InputCursor cursor) + { + // Старт не цифра — токена нет + if (!char.IsDigit(cursor.Current) || cursor.End) + return null; + + var start = cursor.Position; + + while (!cursor.End) + { + if (!char.IsDigit(cursor.Current)) + break; + cursor.Move(1); + } + + var value = cursor.Slice(start, cursor.Position); + return TokenFactory.Create(TokenType.WhiteSpace, value, start); + } +} \ No newline at end of file diff --git a/cs/Markdown/Rules/EscapeRule.cs b/cs/Markdown/TokenCreateRules/EscapeRule.cs similarity index 100% rename from cs/Markdown/Rules/EscapeRule.cs rename to cs/Markdown/TokenCreateRules/EscapeRule.cs diff --git a/cs/Markdown/Rules/HeaderRule.cs b/cs/Markdown/TokenCreateRules/HeaderRule.cs similarity index 100% rename from cs/Markdown/Rules/HeaderRule.cs rename to cs/Markdown/TokenCreateRules/HeaderRule.cs diff --git a/cs/Markdown/Rules/ITokenRule.cs b/cs/Markdown/TokenCreateRules/ITokenRule.cs similarity index 100% rename from cs/Markdown/Rules/ITokenRule.cs rename to cs/Markdown/TokenCreateRules/ITokenRule.cs diff --git a/cs/Markdown/Rules/NewLineRule.cs b/cs/Markdown/TokenCreateRules/NewLineRule.cs similarity index 100% rename from cs/Markdown/Rules/NewLineRule.cs rename to cs/Markdown/TokenCreateRules/NewLineRule.cs diff --git a/cs/Markdown/Rules/TextRunRule.cs b/cs/Markdown/TokenCreateRules/TextRunRule.cs similarity index 59% rename from cs/Markdown/Rules/TextRunRule.cs rename to cs/Markdown/TokenCreateRules/TextRunRule.cs index eb0ee3026..6b0a79064 100644 --- a/cs/Markdown/Rules/TextRunRule.cs +++ b/cs/Markdown/TokenCreateRules/TextRunRule.cs @@ -4,19 +4,26 @@ public sealed class TextRunRule : ITokenRule { public Token TryReadTokenAndMoveCursor(InputCursor cursor) { - if (cursor.End) return null; - - if (cursor.Current == '_' || + if (cursor.End || + cursor.Current == '_' || cursor.Current == '\n' || cursor.Current == '\r' || - cursor.Current == '\\') + cursor.Current == '\\' || + cursor.Current == ' ' || + char.IsDigit(cursor.Current)) return null; var start = cursor.Position; while (!cursor.End) { var c = cursor.Current; - if (c == '_' || c == '\n' || c == '\r' || c == '\\') break; + if (c == '_' || + c == '\n' || + c == '\r' || + c == '\\' || + c == ' ' || + char.IsDigit(c)) + break; cursor.Move(1); } diff --git a/cs/Markdown/Rules/UnderscoreRule.cs b/cs/Markdown/TokenCreateRules/UnderscoreRule.cs similarity index 95% rename from cs/Markdown/Rules/UnderscoreRule.cs rename to cs/Markdown/TokenCreateRules/UnderscoreRule.cs index 97989aedc..d77e636e4 100644 --- a/cs/Markdown/Rules/UnderscoreRule.cs +++ b/cs/Markdown/TokenCreateRules/UnderscoreRule.cs @@ -19,7 +19,7 @@ public Token TryReadTokenAndMoveCursor(InputCursor cursor) if (count >= 2) { cursor.Revert(pos + 2); - cursor.Move(2); + // cursor.Move(2); return TokenFactory.Create(TokenType.DoubleUnderscore, "__", pos); } diff --git a/cs/Markdown/TokenCreateRules/WhiteSpaceRule.cs b/cs/Markdown/TokenCreateRules/WhiteSpaceRule.cs new file mode 100644 index 000000000..09e8e705d --- /dev/null +++ b/cs/Markdown/TokenCreateRules/WhiteSpaceRule.cs @@ -0,0 +1,25 @@ +namespace Markdown; + +public sealed class WhiteSpaceRule : ITokenRule +{ + public Token? TryReadTokenAndMoveCursor(InputCursor cursor) + { + // Не начинаем, если текущий символ не пробел/таб + if (cursor.Current != ' ' || cursor.End) + return null; + + // Копим подряд идущие пробелы/табы, но останавливаемся перед переводом строки + var start = cursor.Position; + + while (!cursor.End) + { + if (cursor.Current != ' ') + break; + cursor.Move(1); + } + + // Сдвигаем курсор и возвращаем токен пробелов как Text (или свой спецтип, если он у вас есть) + var value = cursor.Slice(start, cursor.Position); + return TokenFactory.Create(TokenType.WhiteSpace,value, start); + } +} \ No newline at end of file From 42dbb81a50eee7b4365a9c664b81247a51a59f4f Mon Sep 17 00:00:00 2001 From: MihailK Date: Mon, 17 Nov 2025 17:20:45 +0500 Subject: [PATCH 06/13] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8?= =?UTF-8?q?=D0=BB=20=D0=BD=D0=BE=D0=B2=D1=8B=D0=B5=20=D1=82=D0=B8=D0=BF?= =?UTF-8?q?=D1=8B=20=D1=82=D0=BE=D0=BA=D0=B5=D0=BD=D0=BE=D0=B2=20=D0=B2=20?= =?UTF-8?q?=D0=B3=D0=B5=D0=BD=D0=B5=D1=80=D0=B0=D1=86=D0=B8=D1=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Token/ITokenGenerator.cs | 2 +- cs/Markdown/Token/TextCursor.cs | 4 ++-- cs/Markdown/Token/Token.cs | 2 -- cs/Markdown/Token/TokenFactory.cs | 4 ++-- cs/Markdown/Token/TokenGenerator.cs | 21 +++++++++++++-------- cs/Markdown/Token/TokenType.cs | 4 +++- 6 files changed, 21 insertions(+), 16 deletions(-) diff --git a/cs/Markdown/Token/ITokenGenerator.cs b/cs/Markdown/Token/ITokenGenerator.cs index 75d1aed00..9a4e7a518 100644 --- a/cs/Markdown/Token/ITokenGenerator.cs +++ b/cs/Markdown/Token/ITokenGenerator.cs @@ -2,5 +2,5 @@ public interface ITokenGenerator { - IEnumerable Tokenize(string input); + List Tokenize(string text); } \ No newline at end of file diff --git a/cs/Markdown/Token/TextCursor.cs b/cs/Markdown/Token/TextCursor.cs index fdd86c321..304a179fa 100644 --- a/cs/Markdown/Token/TextCursor.cs +++ b/cs/Markdown/Token/TextCursor.cs @@ -2,14 +2,14 @@ namespace Markdown; -public class InputCursor +public class TextCursor { private readonly string input; public int Position { get; private set; } public int Length => input.Length; public bool End => Position >= input.Length; - public InputCursor(string input) + public TextCursor(string input) { this.input = input ?? string.Empty; Position = 0; diff --git a/cs/Markdown/Token/Token.cs b/cs/Markdown/Token/Token.cs index c99aaea15..9559d55af 100644 --- a/cs/Markdown/Token/Token.cs +++ b/cs/Markdown/Token/Token.cs @@ -6,6 +6,4 @@ public class Token { public TokenType Type; public required string Value { get; init; } - public int StartPos { get; init; } - public int EndPos => StartPos + Value.Length; } \ No newline at end of file diff --git a/cs/Markdown/Token/TokenFactory.cs b/cs/Markdown/Token/TokenFactory.cs index e35e3ec20..f0923c6b8 100644 --- a/cs/Markdown/Token/TokenFactory.cs +++ b/cs/Markdown/Token/TokenFactory.cs @@ -2,6 +2,6 @@ public static class TokenFactory { - public static Token Create(TokenType type, string info, int pos) - => new Token { Type = type, Value = info, StartPos = pos }; + public static Token Create(TokenType type, string? info, int pos) + => new Token { Type = type, Value = info }; } \ No newline at end of file diff --git a/cs/Markdown/Token/TokenGenerator.cs b/cs/Markdown/Token/TokenGenerator.cs index 6fc34264e..3a7268b98 100644 --- a/cs/Markdown/Token/TokenGenerator.cs +++ b/cs/Markdown/Token/TokenGenerator.cs @@ -7,27 +7,32 @@ public class TokenGenerator : ITokenGenerator private readonly List rules = new() { new NewLineRule(), + new WhiteSpaceRule(), new EscapeRule(), new HeaderRule(), new UnderscoreRule(), - new TextRunRule() + new TextRunRule(), + new DigitRule() }; - public IEnumerable Tokenize(string input) + public List Tokenize(string text) { - if (string.IsNullOrEmpty(input)) - yield break; + if (string.IsNullOrEmpty(text)) + return null; + var tokens = new List(); //todo: подумать про строку из пробелов - IsNullOfWhitespace - var cursor = new InputCursor(input); + var cursor = new TextCursor(text); while (!cursor.End) { - var isLineStart = true; - yield return TryCreateToken(cursor); + var token = TryCreateToken(cursor); + tokens.Add(token); } + tokens.Add(TokenFactory.Create(TokenType.EndOfText, null, text.Length)); + return tokens; } - private Token? TryCreateToken(InputCursor cursor) + private Token? TryCreateToken(TextCursor cursor) { Token? token = null; foreach (var rule in rules) diff --git a/cs/Markdown/Token/TokenType.cs b/cs/Markdown/Token/TokenType.cs index fd1477f90..be9596009 100644 --- a/cs/Markdown/Token/TokenType.cs +++ b/cs/Markdown/Token/TokenType.cs @@ -7,5 +7,7 @@ public enum TokenType DoubleUnderscore, HeaderMarker, NewLine, - Escape + Escape, + EndOfText, + WhiteSpace } \ No newline at end of file From 3368b75282de22f2e819b3165d30da73bc5eef09 Mon Sep 17 00:00:00 2001 From: MihailK Date: Mon, 17 Nov 2025 17:23:37 +0500 Subject: [PATCH 07/13] =?UTF-8?q?=D0=9F=D0=BE=D0=BC=D0=B5=D0=BD=D1=8F?= =?UTF-8?q?=D0=BB=20=D0=BF=D1=80=D0=B0=D0=B2=D0=B8=D0=BB=D0=B0=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D1=82=D0=BE=D0=BA=D0=B5=D0=BD=D0=B8=D0=B7=D0=B0?= =?UTF-8?q?=D1=86=D0=B8=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/TokenCreateRules/DigitRule.cs | 3 +-- cs/Markdown/TokenCreateRules/EscapeRule.cs | 2 +- cs/Markdown/TokenCreateRules/HeaderRule.cs | 6 +++--- cs/Markdown/TokenCreateRules/ITokenRule.cs | 2 +- cs/Markdown/TokenCreateRules/NewLineRule.cs | 2 +- cs/Markdown/TokenCreateRules/TextRunRule.cs | 2 +- cs/Markdown/TokenCreateRules/UnderscoreRule.cs | 3 +-- cs/Markdown/TokenCreateRules/WhiteSpaceRule.cs | 7 ++----- 8 files changed, 11 insertions(+), 16 deletions(-) diff --git a/cs/Markdown/TokenCreateRules/DigitRule.cs b/cs/Markdown/TokenCreateRules/DigitRule.cs index 02a9e76bd..b428fe31a 100644 --- a/cs/Markdown/TokenCreateRules/DigitRule.cs +++ b/cs/Markdown/TokenCreateRules/DigitRule.cs @@ -2,9 +2,8 @@ public class DigitRule : ITokenRule { - public Token? TryReadTokenAndMoveCursor(InputCursor cursor) + public Token? TryReadTokenAndMoveCursor(TextCursor cursor) { - // Старт не цифра — токена нет if (!char.IsDigit(cursor.Current) || cursor.End) return null; diff --git a/cs/Markdown/TokenCreateRules/EscapeRule.cs b/cs/Markdown/TokenCreateRules/EscapeRule.cs index fc5c2a1b0..5b365ded1 100644 --- a/cs/Markdown/TokenCreateRules/EscapeRule.cs +++ b/cs/Markdown/TokenCreateRules/EscapeRule.cs @@ -2,7 +2,7 @@ public sealed class EscapeRule : ITokenRule { - public Token? TryReadTokenAndMoveCursor(InputCursor cursor) + public Token? TryReadTokenAndMoveCursor(TextCursor cursor) { if (cursor.End) return null; if (cursor.Current != '\\') return null; diff --git a/cs/Markdown/TokenCreateRules/HeaderRule.cs b/cs/Markdown/TokenCreateRules/HeaderRule.cs index 2da52b900..d1596ae3f 100644 --- a/cs/Markdown/TokenCreateRules/HeaderRule.cs +++ b/cs/Markdown/TokenCreateRules/HeaderRule.cs @@ -2,15 +2,15 @@ public sealed class HeaderRule : ITokenRule { - public Token? TryReadTokenAndMoveCursor(InputCursor cursor) + public Token? TryReadTokenAndMoveCursor(TextCursor cursor) { if (!cursor.IsNewLine() || cursor.End) return null; if (cursor.Current == '#' && cursor.Peek() == ' ') { var currentPos = cursor.Position; - cursor.Move(2); - return TokenFactory.Create(TokenType.HeaderMarker, "# ", currentPos); + cursor.Move(1); + return TokenFactory.Create(TokenType.HeaderMarker, "#", currentPos); } return null; diff --git a/cs/Markdown/TokenCreateRules/ITokenRule.cs b/cs/Markdown/TokenCreateRules/ITokenRule.cs index 84690413e..8f80f815c 100644 --- a/cs/Markdown/TokenCreateRules/ITokenRule.cs +++ b/cs/Markdown/TokenCreateRules/ITokenRule.cs @@ -2,5 +2,5 @@ public interface ITokenRule { - Token? TryReadTokenAndMoveCursor(InputCursor cursor); + Token? TryReadTokenAndMoveCursor(TextCursor cursor); } \ No newline at end of file diff --git a/cs/Markdown/TokenCreateRules/NewLineRule.cs b/cs/Markdown/TokenCreateRules/NewLineRule.cs index 8fff82a31..c83e55374 100644 --- a/cs/Markdown/TokenCreateRules/NewLineRule.cs +++ b/cs/Markdown/TokenCreateRules/NewLineRule.cs @@ -2,7 +2,7 @@ public sealed class NewLineRule : ITokenRule { - public Token TryReadTokenAndMoveCursor(InputCursor cursor) + public Token TryReadTokenAndMoveCursor(TextCursor cursor) { if (cursor.End) return null; if (cursor.Current == '\n') diff --git a/cs/Markdown/TokenCreateRules/TextRunRule.cs b/cs/Markdown/TokenCreateRules/TextRunRule.cs index 6b0a79064..540f25e90 100644 --- a/cs/Markdown/TokenCreateRules/TextRunRule.cs +++ b/cs/Markdown/TokenCreateRules/TextRunRule.cs @@ -2,7 +2,7 @@ public sealed class TextRunRule : ITokenRule { - public Token TryReadTokenAndMoveCursor(InputCursor cursor) + public Token TryReadTokenAndMoveCursor(TextCursor cursor) { if (cursor.End || cursor.Current == '_' || diff --git a/cs/Markdown/TokenCreateRules/UnderscoreRule.cs b/cs/Markdown/TokenCreateRules/UnderscoreRule.cs index d77e636e4..65b637ac3 100644 --- a/cs/Markdown/TokenCreateRules/UnderscoreRule.cs +++ b/cs/Markdown/TokenCreateRules/UnderscoreRule.cs @@ -2,7 +2,7 @@ public sealed class UnderscoreRule : ITokenRule { - public Token TryReadTokenAndMoveCursor(InputCursor cursor) + public Token TryReadTokenAndMoveCursor(TextCursor cursor) { if (cursor.End) return null; if (cursor.Current != '_') return null; @@ -19,7 +19,6 @@ public Token TryReadTokenAndMoveCursor(InputCursor cursor) if (count >= 2) { cursor.Revert(pos + 2); - // cursor.Move(2); return TokenFactory.Create(TokenType.DoubleUnderscore, "__", pos); } diff --git a/cs/Markdown/TokenCreateRules/WhiteSpaceRule.cs b/cs/Markdown/TokenCreateRules/WhiteSpaceRule.cs index 09e8e705d..e3a308772 100644 --- a/cs/Markdown/TokenCreateRules/WhiteSpaceRule.cs +++ b/cs/Markdown/TokenCreateRules/WhiteSpaceRule.cs @@ -2,13 +2,11 @@ public sealed class WhiteSpaceRule : ITokenRule { - public Token? TryReadTokenAndMoveCursor(InputCursor cursor) + public Token? TryReadTokenAndMoveCursor(TextCursor cursor) { - // Не начинаем, если текущий символ не пробел/таб if (cursor.Current != ' ' || cursor.End) return null; - - // Копим подряд идущие пробелы/табы, но останавливаемся перед переводом строки + var start = cursor.Position; while (!cursor.End) @@ -18,7 +16,6 @@ public sealed class WhiteSpaceRule : ITokenRule cursor.Move(1); } - // Сдвигаем курсор и возвращаем токен пробелов как Text (или свой спецтип, если он у вас есть) var value = cursor.Slice(start, cursor.Position); return TokenFactory.Create(TokenType.WhiteSpace,value, start); } From 65629c0fe47e31e17b3443f02f9679579640d17e Mon Sep 17 00:00:00 2001 From: MihailK Date: Mon, 17 Nov 2025 17:25:01 +0500 Subject: [PATCH 08/13] =?UTF-8?q?=D0=A1=D0=B4=D0=B5=D0=BB=D0=B0=D0=BB=20?= =?UTF-8?q?=D0=BD=D0=B5=D0=B7=D0=B0=D0=BA=D0=BE=D0=BD=D1=87=D0=B5=D0=BD?= =?UTF-8?q?=D1=83=D1=8E=20=D0=B2=D0=B5=D1=80=D1=81=D0=B8=D1=8E=20=D0=B3?= =?UTF-8?q?=D0=B5=D0=BD=D0=B5=D1=80=D0=B0=D1=86=D0=B8=D0=B8=20=D0=BD=D0=BE?= =?UTF-8?q?=D0=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/MarkdownTest/RenderTest.cs | 133 ++++++-------- cs/Markdown/Md.cs | 9 +- cs/Markdown/Node/HandelProcessor.cs | 233 +------------------------ cs/Markdown/Node/INodeGenerator.cs | 6 +- cs/Markdown/Node/NodeFactory.cs | 3 +- cs/Markdown/Node/NodeGenerator.cs | 114 +++++------- cs/Markdown/Node/NodeType.cs | 2 +- cs/Markdown/Node/TokenCursor.cs | 22 ++- cs/Markdown/Rendering.cs | 91 +++++++++- 9 files changed, 211 insertions(+), 402 deletions(-) diff --git a/cs/Markdown/MarkdownTest/RenderTest.cs b/cs/Markdown/MarkdownTest/RenderTest.cs index 1b4bedcd5..5df47ee2c 100644 --- a/cs/Markdown/MarkdownTest/RenderTest.cs +++ b/cs/Markdown/MarkdownTest/RenderTest.cs @@ -8,112 +8,79 @@ public class RenderTest { private readonly Md md = new Md(); - [Test] - public void Проверка_экранирования() - { - var text = "\\_Вот это\\_"; - var html = md.Render(text); - - html.Should().Be("_Вот это_"); - } - - [Test] - public void Курсив_внутри_жирного() - { - var text = "__a _b_ c__"; - var html = md.Render(text); - - html.Should().Be("a b c"); - } - - [Test] - public void Жирный_внутри_курсива() - { - var text = "_a __b__ c_"; - var html = md.Render(text); - - html.Should().Be("a __b__ c"); - } - - [Test] - public void Неправильное_прилипание() - { - var text = "_ a_ bbb _a _"; - var html = md.Render(text); - html.Should().Be("_ a_ bbb _a _"); - } - - [Test] - public void Renders_Strong_With_Double_Underscore() + [TestCase("_курсив_", "курсив", TestName = "Просто курсив")] + [TestCase("__полужирный__", "полужирный", TestName = "Просто полужирный")] + [TestCase("_это просто текст_", "это просто текст", TestName = "Просто текст с курсивом")] + [TestCase("__a _b_ c__", "a b c", TestName = "Курсив внутри жирного должен работать")] + public void Проверка_ПравильногоВыделения(string text, string expected) { - var text = "Это __жирный__ текст"; var html = md.Render(text); - - html.Should().Be("Это жирный текст"); + html.Should().Be(expected); } - [Test] - public void Renders_Emphasis_With_Single_Underscore_When_Not_Inside_Word() + [TestCase("__неправильное выделение_", "__неправильное выделение_", TestName = "Разные символы выделения")] + [TestCase("_неправильное выделение__", "_неправильное выделение__", TestName = "Разные символы выделения")] + [TestCase("_a __b b__ c_", "a __b b__ c", TestName = "Жирный внутри курсива не должен работать")] + [TestCase("_ a_ bbb _a _", "_ a_ bbb _a _", TestName = "Неправильное прилипание не выделяет")] + [TestCase("__пересечения _двойных__ и одинарных_", "__пересечения _двойных__ и одинарных_", TestName = "Пересечение разных выделений")] + public void Проверка_НеправильногоВыделения(string text, string expected) { - var text = "Текст, _окруженный с двух сторон_ одинарными символами подчерка"; var html = md.Render(text); - - html.Should().Be("Текст, окруженный с двух сторон одинарными символами подчерка"); + html.Should().Be(expected); } - [Test] - public void Does_Not_Emphasize_Single_Underscore_Inside_Word() + [TestCase("текст c цифрами_12_3", "текст c цифрами_12_3", TestName = "Подчерки в тексте с цифрами")] + [TestCase("вну_три", "вну_три", TestName = "Подчерк внутри слова")] + [TestCase("в__нутр__и сл_о_ва", "внутри слова", + TestName = "Выделение внутри слова работает")] + [TestCase("в __нач__але _сло_ва", "в начале слова", + TestName = "Выделение в начале слова работает")] + [TestCase("в ко__нце__ сло_ва_", "в конце слова", + TestName = "Выделение в конце слова работает")] + [TestCase("в ра_зных сл_овах", "в ра_зных сл_овах", TestName = "Выделение в разных словах не работает")] + [TestCase("эти_ подчерки_ не считаются выделением", "эти_ подчерки_ не считаются выделением", + TestName = "Пробельный символ после подчерка")] + public void Выделение_внутри_слова(string text, string expected) { - var text = "вну_три"; var html = md.Render(text); - - html.Should().Be("вну_три"); + html.Should().Be(expected); } - [Test] - public void Renders_Header_H1_For_Line_Starting_With_Hash_Space() - { - var text = "# Заголовок"; - var html = md.Render(text); - - html.Should().Be("

Заголовок

"); - } - [Test] - public void Keeps_Newline_After_Header_And_Renders_Inline_Text_On_Next_Line() + [TestCase("# Заголовок", "

Заголовок

", TestName = "Заголовок")] + [TestCase("# Заголовок\n_курсив_ и __жирный__ и вну_три", + "

Заголовок

\nкурсив и жирный и вну_три", + TestName = "Сохраняется_перенос_после_заголовка_и_инлайн_на_следующей_строке")] + [TestCase("# Заголовок1\n_курсив_ и __жирный__ и вну_три\n# Заголовок2\nкапибара", + "

Заголовок1

\nкурсив и жирный и вну_три\n

Заголовок2

\nкапибара", + TestName = "Несколько заголовков в одном тексте")] + [TestCase("# Заголовок\nпросто текст", + "

Заголовок

\nпросто текст", + TestName = "Заголовок и перенос строки")] + [TestCase("# Это заголовок c # внутри", "

Это заголовок c # внутри

", TestName = "Решётка внутри заголовка")] + public void ВерныйЗаголовок(string text, string expected) { - var text = "# Заголовок\n_курсив_ и __жирный__ и вну_три"; var html = md.Render(text); - - html.Should().Be("

Заголовок

\nкурсив и жирный и вну_три"); + html.Should().Be(expected); } - [Test] - public void Supports_Multiple_Headers_In_One_Text() + [TestCase("#Это не заголовок", "#Это не заголовок", TestName = "Нет пробела после #")] + [TestCase("Это # не заголовок", "Это # не заголовок", TestName = "# внутри текста не заголовок")] + [TestCase("Это не заголовок #", "Это не заголовок #", TestName = "# после текста не заголовок")] + public void НеверныйЗаголовок(string text, string expected) { - var text = "# Заголовок1\n_курсив_ и __жирный__ и вну_три\n# Заголовок2\nкапибара"; var html = md.Render(text); - - html.Should() - .Be( - "

Заголовок1

\nкурсив и жирный и вну_три\n

Заголовок2

\nкапибара"); - } - - public static IEnumerable StrongCases() - { - yield return new object[] { "__a__", "a" }; - yield return new object[] { "____", "" }; - yield return - new object[] { "___a__", "_a" }; // одинарное останется как текст + html.Should().Be(expected); } - public static IEnumerable EmphasisCases() + [TestCase("\\_это не работает_", "_это не работает_", TestName = "1111")] + [TestCase("\\\\_это работает_", "\\это работает", + TestName = "111")] + public void Экранирование(string text, string expected) { - yield return new object[] { "_a_", "a" }; - yield return new object[] { "a_b", "a_b" }; // внутри слова не срабатывает - yield return - new object[] { "_ a_", "_ a_" }; // открывающее слитно с пробелом — остаётся как текст + var html = md.Render(text); + html.Should().Be(expected); } } \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs index b02181d30..922c26bb4 100644 --- a/cs/Markdown/Md.cs +++ b/cs/Markdown/Md.cs @@ -1,16 +1,11 @@ -using System.Text; - -namespace Markdown; - -using System; -using System.Collections.Generic; -using System.Linq; +namespace Markdown; public class Md { private readonly TokenGenerator tokenGenerator; private readonly NodeGenerator nodeGenerator; private readonly HtmlRenderer htmlRenderer; + public Md() { tokenGenerator = new TokenGenerator(); diff --git a/cs/Markdown/Node/HandelProcessor.cs b/cs/Markdown/Node/HandelProcessor.cs index 003e6b2a6..f6e9f9332 100644 --- a/cs/Markdown/Node/HandelProcessor.cs +++ b/cs/Markdown/Node/HandelProcessor.cs @@ -1,6 +1,6 @@ namespace Markdown; -public static class Handel +public static class HandelProcessor { public static void Escape(List children, TokenCursor tokenCursor) { @@ -22,236 +22,9 @@ public static void Escape(List children, TokenCursor tokenCursor) tokenCursor.Move(); } - - public static void Underscores( - List children, - List<(TokenType Type, int ChildrenIndex)> underscores, - TokenCursor tokenCursor) - { - var currentToken = tokenCursor.Current; - var previousToken = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position - 1); - var nextToken = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position + 1); - - if (IsUnderscoreBetweenDigits(previousToken, nextToken)) // между числами - { - AddUnderscoreAsLiteral(children, currentToken.Value, tokenCursor); - return; - } - - if (!IsTokenWhitespaceLike(previousToken)) // слева пробел или граница - { - if (TryCloseExistingHighlight(underscores, currentToken, children, tokenCursor)) - return; - } - - - if (IsTokenWhitespaceLike(nextToken) || - (currentToken.Type == TokenType.DoubleUnderscore && DoesDoubleUnderscoreBreak(tokenCursor))) - { - AddUnderscoreAsLiteral(children, currentToken.Value, tokenCursor); - } - else - { - underscores.Add((currentToken.Type, children.Count)); - tokenCursor.Move(); - } - } - - public static void InsertUnmatchedUnderscores(List children, - List<(TokenType Type, int ChildrenIndex)> underscores) - { - for (var i = underscores.Count - 1; i >= 0; i--) - { - var (type, index) = underscores[i]; - var literal = type == TokenType.DoubleUnderscore ? "__" : "_"; - children.Insert(index, NodeFactory.Create(NodeType.Text, literal, null)); - } - } - - private static bool TryCloseExistingHighlight( - List<(TokenType Type, int ChildrenIndex)> underscores, - Token currentToken, - List children, - TokenCursor cursor) - { - var highlightingType = currentToken.Type; - var openerIndex = FindMatchingOpenerIndex(underscores, highlightingType); - if (openerIndex < 0) - return false; - - var openerUnderscore = underscores[openerIndex]; - var startIndex = openerUnderscore.ChildrenIndex; - var innerTokensCount = children.Count - startIndex; - - if (innerTokensCount == 0) - return false; - - var innerTokens = children.GetRange(startIndex, innerTokensCount); - - if (!IsValidHighlighting(underscores, highlightingType, openerUnderscore, innerTokens)) - return false; - - if (HasIntersection(underscores, openerUnderscore, highlightingType, children, openerIndex, out var innerIndex)) - { - InsertIntersection(children, underscores, openerUnderscore, underscores[innerIndex], openerIndex, - innerIndex); - AddUnderscoreAsLiteral(children, currentToken.Value, cursor); - return true; - } - - CloseHighlight(children, underscores, openerIndex, highlightingType, innerTokens, startIndex); - cursor.Move(); - return true; - } - - private static int FindMatchingOpenerIndex(List<(TokenType Type, int ChildrenIndex)> underscores, TokenType type) - { - for (var i = underscores.Count - 1; i >= 0; i--) - { - if (underscores[i].Type == type) - return i; - } - - return -1; - } - - private static bool IsValidHighlighting( - List<(TokenType Type, int ChildrenIndex)> underscores, - TokenType highlightingType, - (TokenType Type, int ChildrenIndex) openerUnderscore, - List innerTokens) - { - if (highlightingType != TokenType.Underscore) - return true; - //точно "_" - var highlightingIsInsideDoubleUnderscore = underscores.Any(underscore => - underscore.Type == TokenType.DoubleUnderscore && - underscore.ChildrenIndex < openerUnderscore.ChildrenIndex); // "_" внутри двойного - - var areThereAnyWhitespaces = HasWhiteSpaceInPlainNodes(innerTokens); // Есть пробелы - return highlightingIsInsideDoubleUnderscore - || !areThereAnyWhitespaces; - } - - private static bool HasWhiteSpaceInPlainNodes(List innerTokens) - { - if (innerTokens == null || innerTokens.Count == 0) - return false; - - foreach (var t in innerTokens) - { - if (t.Type == NodeType.Text && t.Value != null) - { - var s = t.Value; - for (int j = 0; j < s.Length; j++) - { - if (char.IsWhiteSpace(s[j])) - return true; - } - } - } - - return false; - } - - private static bool HasIntersection( - List<(TokenType Type, int ChildrenIndex)> underscores, - (TokenType Type, int ChildrenIndex) opener, - TokenType highlightingType, - List children, - int openerIndex, - out int intersectionIndex) - { - for (var i = openerIndex + 1; i < underscores.Count; i++) - { - if (underscores[i].Type == highlightingType || - underscores[i].ChildrenIndex <= opener.ChildrenIndex || - underscores[i].ChildrenIndex >= children.Count) - continue; - - intersectionIndex = i; - return true; - } - - intersectionIndex = -1; - return false; - } - - private static void CloseHighlight( - List children, - List<(TokenType Type, int ChildrenIndex)> underscores, - int openerIndex, - TokenType highlightingType, - List innerTokens, - int startIndex) - { - children.RemoveRange(startIndex, innerTokens.Count); - - var node = highlightingType == TokenType.DoubleUnderscore - ? NodeFactory.Create(NodeType.Strong, null, innerTokens) - : NodeFactory.Create(NodeType.Emphasis, null, innerTokens); - - children.Add(node); - underscores.RemoveAt(openerIndex); - } - - private static void InsertIntersection( - List children, - List<(TokenType Type, int ChildrenIndex)> underscores, - (TokenType Type, int ChildrenIndex) opener, - (TokenType Type, int ChildrenIndex) inner, - int openerIndex, int innerIndex) - { - var innerLiteral = inner.Type == TokenType.DoubleUnderscore ? "__" : "_"; - var openerLiteral = opener.Type == TokenType.DoubleUnderscore ? "__" : "_"; - - children.Insert(inner.ChildrenIndex, NodeFactory.Create(NodeType.Text, innerLiteral, null)); - children.Insert(inner.ChildrenIndex, NodeFactory.Create(NodeType.Text, openerLiteral, null)); - - underscores.RemoveAt(innerIndex); - underscores.RemoveAt(openerIndex); - } - - private static void AddUnderscoreAsLiteral(List children, string value, TokenCursor cursor) - { - children.Add(NodeFactory.Create(NodeType.Text, value, null)); - cursor.Move(); - } - - private static bool IsTokenWhitespaceLike(Token? token) - { - return token == null || - token.Type == TokenType.WhiteSpace || - token.Type == TokenType.NewLine || - token.Type == TokenType.EndOfText; - } - - private static bool IsUnderscoreBetweenDigits(Token? previousToken, Token? nextToken) - { - var leftTokenIsDigit = previousToken is { Type: TokenType.Text, Value.Length: > 0 } && - char.IsDigit(previousToken.Value.Last()); - var rightTokenIsDigit = nextToken is { Type: TokenType.Text, Value.Length: > 0 } && - char.IsDigit(nextToken.Value.First()); - return leftTokenIsDigit && rightTokenIsDigit; - } - - private static bool DoesDoubleUnderscoreBreak(TokenCursor cursor) + public static void Score(List children, TokenCursor tokenCursor) { - var singleUnderscoresCount = 0; - for (var i = cursor.Position + 1; i < cursor.TokenCount; i++) - { - var currentToken = cursor.tokens[i]; - if (currentToken.Type is TokenType.NewLine or TokenType.EndOfText) return false; - if (currentToken.Type == TokenType.DoubleUnderscore) return (singleUnderscoresCount % 2) == 1; - if (currentToken.Type != TokenType.Underscore) continue; - - var previousToken = cursor.tokens.ElementAtOrDefault(i - 1); - if (previousToken is { Type: TokenType.Escape }) continue; - - singleUnderscoresCount++; - } - - return false; + throw new NotImplementedException(); } } \ No newline at end of file diff --git a/cs/Markdown/Node/INodeGenerator.cs b/cs/Markdown/Node/INodeGenerator.cs index 7b74a1651..ee761d675 100644 --- a/cs/Markdown/Node/INodeGenerator.cs +++ b/cs/Markdown/Node/INodeGenerator.cs @@ -1,8 +1,6 @@ -using System.Collections.Generic; - -namespace Markdown; +namespace Markdown; public interface INodeGenerator { - List Create(List tokens); + public List Create(List tokens); } \ No newline at end of file diff --git a/cs/Markdown/Node/NodeFactory.cs b/cs/Markdown/Node/NodeFactory.cs index ca47093a1..2656277a0 100644 --- a/cs/Markdown/Node/NodeFactory.cs +++ b/cs/Markdown/Node/NodeFactory.cs @@ -2,5 +2,6 @@ public class NodeFactory { - + public static Node Create(NodeType type, string? info, List? children) + => new Node { Type = type, Value = info, Children = children }; } \ No newline at end of file diff --git a/cs/Markdown/Node/NodeGenerator.cs b/cs/Markdown/Node/NodeGenerator.cs index ac5a06532..e85e956e6 100644 --- a/cs/Markdown/Node/NodeGenerator.cs +++ b/cs/Markdown/Node/NodeGenerator.cs @@ -1,86 +1,60 @@ -using Markdown; +namespace Markdown; -public sealed class RuleBasedNodeGenerator : INodeGenerator +public class NodeGenerator : INodeGenerator { - private readonly InlinePipeline _inline; - - public RuleBasedNodeGenerator() - { - // Инициализация конвейера: порядок — от более специализированных к общим. - _inline = new InlinePipeline(new INodeRule[] - { - new EscapeRule(), // экранирование [web:30] - new StrongUnderscoreRule(null!), // временно null — заменим после [web:34] - new EmphasisUnderscoreRule(null!),// временно null [web:34] - new PlainTextRule(), // текст по умолчанию [web:36] - new NewLineRule() // перенос строки [web:36] - }); - - // Внедряем ссылку на pipeline в правила, которые её требуют. - ReplacePipeline(_inline); - } - - private void ReplacePipeline(InlinePipeline pipeline) + public List Create(List tokens) { - foreach (var rule in pipeline - .GetType() - .GetField("_rules", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)! - .GetValue(pipeline) as List) - { - if (rule is StrongUnderscoreRule s && s.GetType().GetField("_inline", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance) != null) - s.GetType().GetField("_inline", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!.SetValue(s, pipeline); // внедрение контекста [web:34]; - - if (rule is EmphasisUnderscoreRule e && e.GetType().GetField("_inline", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance) != null) - e.GetType().GetField("_inline", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!.SetValue(e, pipeline); // внедрение [web:34]; - } + var tokenCursor = new TokenCursor(tokens); + var nods = new List(); + while (!tokenCursor.End) + nods.Add(CreateNode(tokenCursor)); + return nods; } - public List Create(List tokens) + private static Node CreateNode(TokenCursor tokenCursor) { - var currentHeader = new List(); // буфер строки - foreach (var t in tokens) + switch (tokenCursor.Current) { - if (t.Type == TokenType.NewLine) + case { Type: TokenType.NewLine }: { - foreach (var node in FlushLine(currentHeader)) - yield return node; // сброс строки - yield return new Node { Type = NodeType.Plain, Value = "\n" }; // перенос - currentHeader.Clear(); // новая строка - continue; + var newLineNode = NodeFactory.Create(NodeType.Text, tokenCursor.Current.Value, null); + tokenCursor.Move(); + return newLineNode; } - - if (currentHeader.Count == 0 && t.Type == TokenType.HeaderMarker) - { - // Собираем до конца строки и обрабатываем заголовок отдельным правилом. - currentHeader.Add(t); // маркер [web:30]. - continue; - } - - currentHeader.Add(t); // копим токены строки [web:35]. + case { Type: TokenType.HeaderMarker } when tokenCursor.TakeNext() is { Type: TokenType.WhiteSpace }: + tokenCursor.Move(2); + return NodeFactory.Create(NodeType.Header, null, GetChildrenNods(tokenCursor)); + default: + return NodeFactory.Create(NodeType.Text, null, GetChildrenNods(tokenCursor)); } - - // хвост - foreach (var node in FlushLine(currentHeader)) - yield return node; // финальная строка } - private IEnumerable FlushLine(List lineTokens) + private static List GetChildrenNods(TokenCursor tokenCursor) { - if (lineTokens.Count == 0) - yield break; // пусто - - if (lineTokens[0].Type == TokenType.HeaderMarker) - { - var headerRule = new HeaderRule(_inline); // правило заголовка - int idx = 0; - var outNodes = new List(); // временный список - headerRule.Apply(lineTokens, ref idx, outNodes); // применяем - foreach (var n in outNodes) yield return n; // отдаём - } - else + var children = new List(); + while (!tokenCursor.End && + tokenCursor.Current is not { Type: TokenType.NewLine }) { - foreach (var n in _inline.Parse(lineTokens)) - yield return n; // инлайны строки [web:34]. + switch (tokenCursor.Current!.Type) + { + case TokenType.Underscore: + case TokenType.DoubleUnderscore: + HandelProcessor.Score(children, tokenCursor); + break; + case TokenType.Escape: + HandelProcessor.Escape(children, tokenCursor); + break; + case TokenType.NewLine: + case TokenType.EndOfText: + case TokenType.Text: + case TokenType.WhiteSpace: + case TokenType.HeaderMarker: + default: + children.Add(NodeFactory.Create(NodeType.Text, tokenCursor.Current.Value, null)); + tokenCursor.Move(); + break; + } } + return children; } -} +} \ No newline at end of file diff --git a/cs/Markdown/Node/NodeType.cs b/cs/Markdown/Node/NodeType.cs index a34f14a06..dceaa1501 100644 --- a/cs/Markdown/Node/NodeType.cs +++ b/cs/Markdown/Node/NodeType.cs @@ -2,7 +2,7 @@ public enum NodeType { - Plain, // обычная строка + Text, // обычная строка Header, // заголовок Emphasis, // целиком строка в _..._ Strong, // целиком строка в __...__ diff --git a/cs/Markdown/Node/TokenCursor.cs b/cs/Markdown/Node/TokenCursor.cs index ba646b136..43e9958d0 100644 --- a/cs/Markdown/Node/TokenCursor.cs +++ b/cs/Markdown/Node/TokenCursor.cs @@ -1,6 +1,24 @@ namespace Markdown; -public class TokenCursor +public sealed class TokenCursor(List tokens) { - + public List tokens {get; } = tokens ?? throw new ArgumentNullException(nameof(tokens)); + public int Position { get; private set; } = 0; + + public readonly int TokenCount = tokens.Count; + public bool End => Position >= tokens.Count - 1; + public Token? Current => End ? null : tokens[Position]; + + public void Move(int offset = 1) + { + if (Position + offset < tokens.Count) + Position += offset; + else + throw new IndexOutOfRangeException(); + } + + public Token? TakeNext() + { + return Position + 1 < TokenCount ? tokens[Position + 1] : null; + } } \ No newline at end of file diff --git a/cs/Markdown/Rendering.cs b/cs/Markdown/Rendering.cs index 6e41574b9..5d7d5d7fb 100644 --- a/cs/Markdown/Rendering.cs +++ b/cs/Markdown/Rendering.cs @@ -1,16 +1,99 @@ -using System.Text; +using System.Net; +using System.Text; +using System.Collections.Generic; +using System.Linq; +using System.Net; namespace Markdown; public interface IRenderer { - string Render(IEnumerable lineNodes); + string Render(IEnumerable nodes); } public class HtmlRenderer : IRenderer { - public string Render(IEnumerable lines) + public string Render(IEnumerable nodes) { - throw new NotImplementedException(); + if (nodes == null) return string.Empty; + + var sb = new StringBuilder(); + + // Идём последовательно по узлам верхнего уровня + foreach (var node in nodes) + { + RenderNode(node, sb); + } + + return sb.ToString(); + } + + private static void RenderNode(Node node, StringBuilder sb) + { + if (node == null) return; + + switch (node.Type) + { + case NodeType.Header: + // В соответствии с условием: абзац, начинающийся с "# ", ->

...

+ // Дети содержат инлайны (Plain/Emphasis/Strong/Link) + sb.Append("

"); + foreach (var child in node.Children) + RenderNode(child, sb); + sb.Append("

"); + break; + + case NodeType.Strong: + sb.Append(""); + if (node.Children != null) + foreach (var child in node.Children) + RenderNode(child, sb); + else + sb.Append(node.Value); + sb.Append(""); + break; + + case NodeType.Emphasis: + sb.Append(""); + if (node.Children != null) + foreach (var child in node.Children) + RenderNode(child, sb); + else + sb.Append(node.Value); + sb.Append(""); + break; + + case NodeType.Link: + // В данной постановке правила ссылок не описаны подробно. + // Предположим семантику: Value = href, Children = текст ссылки. + // Если Children пусты — рендерим сам href как текст. + var href = WebUtility.HtmlEncode(node.Value ?? string.Empty); + sb.Append(""); + if (node.Children != null && node.Children.Count > 0) + RenderInlineChildren(node.Children, sb); + else + sb.Append(href); + sb.Append(""); + break; + + case NodeType.Text: + default: + // Текстовые узлы: экранируем + var text = WebUtility.HtmlEncode(node.Value ?? string.Empty); + sb.Append(text); + // Если у Plain есть дети (на всякий случай) — тоже обойти + if (node.Children != null && node.Children.Count > 0) + RenderInlineChildren(node.Children, sb); + break; + } + } + + private static void RenderInlineChildren(List children, System.Text.StringBuilder sb) + { + if (children == null || children.Count == 0) return; + + // Соседние Plain можно слить ещё на этапе генерации; здесь просто обходим + foreach (var child in children) + RenderNode(child, sb); } } \ No newline at end of file From 0aed9a7d5b372fbb131f2d5bc0a56771d4478c31 Mon Sep 17 00:00:00 2001 From: MihailK Date: Mon, 17 Nov 2025 22:50:30 +0500 Subject: [PATCH 09/13] =?UTF-8?q?=D0=94=D0=BE=D0=B4=D0=B5=D0=BB=D0=B0?= =?UTF-8?q?=D0=BB=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D1=83?= =?UTF-8?q?=20=D0=BF=D0=BE=D0=B4=D1=87=D1=91=D1=80=D0=BA=D0=B8=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D0=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Node/HandelProcessor.cs | 170 +++++++++++++++++++++++++++- cs/Markdown/Node/NodeGenerator.cs | 1 + cs/Markdown/Node/TokenCursor.cs | 5 +- 3 files changed, 171 insertions(+), 5 deletions(-) diff --git a/cs/Markdown/Node/HandelProcessor.cs b/cs/Markdown/Node/HandelProcessor.cs index f6e9f9332..96e682073 100644 --- a/cs/Markdown/Node/HandelProcessor.cs +++ b/cs/Markdown/Node/HandelProcessor.cs @@ -25,6 +25,174 @@ public static void Escape(List children, TokenCursor tokenCursor) public static void Score(List children, TokenCursor tokenCursor) { - throw new NotImplementedException(); + var currentToken = tokenCursor.Current; + var previousToken = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position - 1); + var nextToken = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position + 1); + + if (TokenBetweenDigits(previousToken, nextToken) || ItWhiteSpaceOrEndToken(nextToken)) + { + children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); + tokenCursor.Move(); + return; + } + + var currentTokenType = currentToken.Type; + var currentTokenPos = tokenCursor.Position; + var currentNodeType = currentTokenType == TokenType.DoubleUnderscore ? NodeType.Strong : NodeType.Emphasis; + + var innerScore = new List<(int Index, TokenType Type)>(); + var closeIndex = -1; + + for (var i = currentTokenPos + 1; i < tokenCursor.TokenCount; i++) + { + var t = tokenCursor.tokens[i]; + + + if (t.Type is TokenType.NewLine or TokenType.EndOfText) + break; + + if (t.Type == currentTokenType) + { + var left = tokenCursor.tokens.ElementAtOrDefault(i - 1); + var right = tokenCursor.tokens.ElementAtOrDefault(i + 1); + if (left?.Type != TokenType.WhiteSpace && + right?.Type is TokenType.WhiteSpace or TokenType.EndOfText) + { + closeIndex = i; + break; + } + } + + if (t.Type == TokenType.Underscore || t.Type == TokenType.DoubleUnderscore) + innerScore.Add((i, t.Type)); + } + + if (closeIndex < 0) + { + for (var i = currentTokenPos + 1; i < tokenCursor.TokenCount; i++) + { + var token = tokenCursor.tokens[i]; + if (token.Type == TokenType.WhiteSpace) + { + children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); + tokenCursor.Move(); + return; + } + + if (token.Type == currentTokenType) + { + closeIndex = i; + break; + } + } + + if (closeIndex < 0) + { + children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); + tokenCursor.Move(); + return; + } + + var nodes = GetChildren(tokenCursor, currentTokenPos + 1, closeIndex); + children.Add(NodeFactory.Create(currentNodeType, null, nodes)); + tokenCursor.Move(closeIndex - currentTokenPos + 1); + return; + } + + var start = currentTokenPos + 1; + var end = closeIndex; + + if (HasCrossingPairs(closeIndex, innerScore, tokenCursor)) + { + children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); + children.Add(NodeFactory.Create(NodeType.Text, null, GetChildren(tokenCursor, start, end))); + children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); + tokenCursor.Move(closeIndex - currentTokenPos + 1); + return; + } + + switch (currentTokenType) + { + case TokenType.Underscore: + { + var hasDoubleInside = innerScore.Any(u => + u.Type == TokenType.DoubleUnderscore && u.Index > start && u.Index < end); + if (hasDoubleInside) + { + var node = NodeFactory.Create(NodeType.Emphasis, null, GetChildren(tokenCursor, start, end)); + children.Add(node); + } + else + { + var node = NodeFactory.Create(NodeType.Emphasis, null, GetChildren(tokenCursor, start, end)); + children.Add(node); + } + + tokenCursor.Move(closeIndex - currentTokenPos + 1); + return; + } + case TokenType.DoubleUnderscore: + { + var childTokens = tokenCursor.tokens.Slice(currentTokenPos + 1, closeIndex - currentTokenPos); + var childNode = new NodeGenerator().Create(childTokens); + children.Add(NodeFactory.Create(currentNodeType, null, childNode)); + tokenCursor.Move(end - currentTokenPos + 1); + break; + } + } + } + + private static bool HasCrossingPairs(int closeIndex, + List<(int Index, TokenType Type)> innerUnderscores, + TokenCursor cursor) + { + var scoreType = cursor.Current.Type == TokenType.Underscore? TokenType.DoubleUnderscore : TokenType.Underscore; + foreach (var u in innerUnderscores.Where(x => x.Type == scoreType)) + { + for (var j = u.Index + 1; j < cursor.TokenCount; j++) + { + var t = cursor.tokens[j]; + if (t.Type is TokenType.NewLine or TokenType.EndOfText) + break; + if (t.Type == scoreType) + { + var left = cursor.tokens.ElementAtOrDefault(j - 1); + if (left?.Type != TokenType.WhiteSpace) + { + if (j > closeIndex) + return true; + break; + } + } + } + } + + return false; + } + + private static List GetChildren(TokenCursor cursor, int start, int end) + { + var slice = new List(); + for (int i = start; i < end; i++) + { + var t = cursor.tokens[i]; + slice.Add(NodeFactory.Create(NodeType.Text, t.Value, null)); + } + + return slice; + } + + private static bool ItWhiteSpaceOrEndToken(Token? token) + { + return token is { Type: TokenType.WhiteSpace } or { Type: TokenType.EndOfText }; + } + + private static bool TokenBetweenDigits(Token? previousToken, Token? nextToken) + { + var leftTokenIsDigit = previousToken is { Type: TokenType.Text, Value.Length: > 0 } && + char.IsDigit(previousToken.Value.Last()); + var rightTokenIsDigit = nextToken is { Type: TokenType.Text, Value.Length: > 0 } && + char.IsDigit(nextToken.Value.First()); + return leftTokenIsDigit && rightTokenIsDigit; } } \ No newline at end of file diff --git a/cs/Markdown/Node/NodeGenerator.cs b/cs/Markdown/Node/NodeGenerator.cs index e85e956e6..c6c62e898 100644 --- a/cs/Markdown/Node/NodeGenerator.cs +++ b/cs/Markdown/Node/NodeGenerator.cs @@ -55,6 +55,7 @@ private static List GetChildrenNods(TokenCursor tokenCursor) break; } } + return children; } } \ No newline at end of file diff --git a/cs/Markdown/Node/TokenCursor.cs b/cs/Markdown/Node/TokenCursor.cs index 43e9958d0..79d477342 100644 --- a/cs/Markdown/Node/TokenCursor.cs +++ b/cs/Markdown/Node/TokenCursor.cs @@ -17,8 +17,5 @@ public void Move(int offset = 1) throw new IndexOutOfRangeException(); } - public Token? TakeNext() - { - return Position + 1 < TokenCount ? tokens[Position + 1] : null; - } + public Token? TakeNext() => Position + 1 < TokenCount ? tokens[Position + 1] : null; } \ No newline at end of file From 9a48d4a83e2e8a4ba7e07e0e597167a226a483d7 Mon Sep 17 00:00:00 2001 From: MihailK Date: Mon, 17 Nov 2025 22:51:01 +0500 Subject: [PATCH 10/13] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8?= =?UTF-8?q?=D0=BB=20=D0=B5=D1=89=D1=91=20=D1=82=D0=B5=D1=81=D1=82=D0=BE?= =?UTF-8?q?=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/MarkdownTest/RenderTest.cs | 16 ++++++++++------ cs/clean-code.sln | 18 ------------------ 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/cs/Markdown/MarkdownTest/RenderTest.cs b/cs/Markdown/MarkdownTest/RenderTest.cs index 5df47ee2c..6d80ef3ab 100644 --- a/cs/Markdown/MarkdownTest/RenderTest.cs +++ b/cs/Markdown/MarkdownTest/RenderTest.cs @@ -12,7 +12,7 @@ public class RenderTest [TestCase("_курсив_", "курсив", TestName = "Просто курсив")] [TestCase("__полужирный__", "полужирный", TestName = "Просто полужирный")] [TestCase("_это просто текст_", "это просто текст", TestName = "Просто текст с курсивом")] - [TestCase("__a _b_ c__", "a b c", TestName = "Курсив внутри жирного должен работать")] + [TestCase("__a _b_ c__", "a b c", TestName = "Курсив внутри полужирного должен работать")] public void Проверка_ПравильногоВыделения(string text, string expected) { var html = md.Render(text); @@ -21,9 +21,12 @@ public void Проверка_ПравильногоВыделения(string tex [TestCase("__неправильное выделение_", "__неправильное выделение_", TestName = "Разные символы выделения")] [TestCase("_неправильное выделение__", "_неправильное выделение__", TestName = "Разные символы выделения")] - [TestCase("_a __b b__ c_", "a __b b__ c", TestName = "Жирный внутри курсива не должен работать")] + [TestCase("_a __b b__ c_", "a __b b__ c", TestName = "Внутри одинарного двойное не работает.")] [TestCase("_ a_ bbb _a _", "_ a_ bbb _a _", TestName = "Неправильное прилипание не выделяет")] - [TestCase("__пересечения _двойных__ и одинарных_", "__пересечения _двойных__ и одинарных_", TestName = "Пересечение разных выделений")] + [TestCase("__пересечение _двойных__ и одинарных_", "__пересечение _двойных__ и одинарных_", + TestName = "Пересечение разных выделений")] + [TestCase("_пересечение __одинарных_ и двойных__", "_пересечение __одинарных_ и двойных__", + TestName = "Пересечение разных выделений")] public void Проверка_НеправильногоВыделения(string text, string expected) { var html = md.Render(text); @@ -75,9 +78,10 @@ public void НеверныйЗаголовок(string text, string expected) } - [TestCase("\\_это не работает_", "_это не работает_", TestName = "1111")] - [TestCase("\\\\_это работает_", "\\это работает", - TestName = "111")] + [TestCase(@"\_это не работает_", "_это не работает_", TestName = "Экранирование курсива")] + [TestCase(@"\# Заголовок", "# Заголовок", TestName = "Экранирование заголовка")] + [TestCase(@"\\_это работает_", @"\это работает", TestName = "Экранирование экранирования")] + [TestCase(@"_это не\_ работает_", "это не_ работает", TestName = "Экранирование внутри")] public void Экранирование(string text, string expected) { var html = md.Render(text); diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 93a958059..ac7a9b0f1 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -3,12 +3,6 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 14 VisualStudioVersion = 14.0.25420.1 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Chess", "Chess\Chess.csproj", "{DBFBE40E-EE0C-48F4-8763-EBD11C960081}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigit\ControlDigit.csproj", "{B06A4B35-9D61-4A63-9167-0673F20CA989}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" -EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{3FAFF8B6-E113-410C-AB67-1A9614EE6BB4}" EndProject Global @@ -17,18 +11,6 @@ Global Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {DBFBE40E-EE0C-48F4-8763-EBD11C960081}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {DBFBE40E-EE0C-48F4-8763-EBD11C960081}.Debug|Any CPU.Build.0 = Debug|Any CPU - {DBFBE40E-EE0C-48F4-8763-EBD11C960081}.Release|Any CPU.ActiveCfg = Release|Any CPU - {DBFBE40E-EE0C-48F4-8763-EBD11C960081}.Release|Any CPU.Build.0 = Release|Any CPU - {B06A4B35-9D61-4A63-9167-0673F20CA989}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {B06A4B35-9D61-4A63-9167-0673F20CA989}.Debug|Any CPU.Build.0 = Debug|Any CPU - {B06A4B35-9D61-4A63-9167-0673F20CA989}.Release|Any CPU.ActiveCfg = Release|Any CPU - {B06A4B35-9D61-4A63-9167-0673F20CA989}.Release|Any CPU.Build.0 = Release|Any CPU - {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU - {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU - {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU {3FAFF8B6-E113-410C-AB67-1A9614EE6BB4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {3FAFF8B6-E113-410C-AB67-1A9614EE6BB4}.Debug|Any CPU.Build.0 = Debug|Any CPU {3FAFF8B6-E113-410C-AB67-1A9614EE6BB4}.Release|Any CPU.ActiveCfg = Release|Any CPU From b3e1dd9a8dbe6762d783245c17a82a48aa85acfb Mon Sep 17 00:00:00 2001 From: MihailK Date: Fri, 21 Nov 2025 20:12:52 +0500 Subject: [PATCH 11/13] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8?= =?UTF-8?q?=D0=BB=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D1=83?= =?UTF-8?q?=20=D1=81=D1=81=D1=8B=D0=BB=D0=BA=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/MarkdownTest/RenderTest.cs | 20 ++++ cs/Markdown/Node/HandelProcessor.cs | 7 ++ cs/Markdown/Node/NodeGenerator.cs | 4 + cs/Markdown/Rendering.cs | 18 +-- cs/Markdown/Token/TextCursor.cs | 9 ++ .../{ => Token}/TokenCreateRules/DigitRule.cs | 0 .../TokenCreateRules/EscapeRule.cs | 0 .../TokenCreateRules/HeaderRule.cs | 0 .../TokenCreateRules/ITokenRule.cs | 0 .../Token/TokenCreateRules/LinkRule.cs | 105 ++++++++++++++++++ .../TokenCreateRules/NewLineRule.cs | 0 .../TokenCreateRules/TextRunRule.cs | 0 .../TokenCreateRules/UnderscoreRule.cs | 0 .../TokenCreateRules/WhiteSpaceRule.cs | 0 cs/Markdown/Token/TokenGenerator.cs | 2 +- cs/Markdown/Token/TokenType.cs | 3 +- 16 files changed, 150 insertions(+), 18 deletions(-) rename cs/Markdown/{ => Token}/TokenCreateRules/DigitRule.cs (100%) rename cs/Markdown/{ => Token}/TokenCreateRules/EscapeRule.cs (100%) rename cs/Markdown/{ => Token}/TokenCreateRules/HeaderRule.cs (100%) rename cs/Markdown/{ => Token}/TokenCreateRules/ITokenRule.cs (100%) create mode 100644 cs/Markdown/Token/TokenCreateRules/LinkRule.cs rename cs/Markdown/{ => Token}/TokenCreateRules/NewLineRule.cs (100%) rename cs/Markdown/{ => Token}/TokenCreateRules/TextRunRule.cs (100%) rename cs/Markdown/{ => Token}/TokenCreateRules/UnderscoreRule.cs (100%) rename cs/Markdown/{ => Token}/TokenCreateRules/WhiteSpaceRule.cs (100%) diff --git a/cs/Markdown/MarkdownTest/RenderTest.cs b/cs/Markdown/MarkdownTest/RenderTest.cs index 6d80ef3ab..494a90477 100644 --- a/cs/Markdown/MarkdownTest/RenderTest.cs +++ b/cs/Markdown/MarkdownTest/RenderTest.cs @@ -49,6 +49,13 @@ public void Выделение_внутри_слова(string text, string expec var html = md.Render(text); html.Should().Be(expected); } + + [TestCase("_____", "_____", TestName = "Подряд идущие подчёркивания")] + public void ПростоПодчёркивания(string text, string expected) + { + var html = md.Render(text); + html.Should().Be(expected); + } [TestCase("# Заголовок", "

Заголовок

", TestName = "Заголовок")] @@ -87,4 +94,17 @@ public void Экранирование(string text, string expected) var html = md.Render(text); html.Should().Be(expected); } + + [TestCase("Ссылка: https://ulearn.me", "Ссылка: ", TestName = "https распознаётся, как ссылка")] + [TestCase("Ссылка: www.example.org", "Ссылка: ", TestName = "www распознаётся, как ссылка")] + [TestCase("Ссылка: example.com", "Ссылка: ", TestName = "example.com распознаётся_как_ссылка")] + [TestCase("Несколько ссылок: https://a.ru и http://b.com", + "Несколько ссылок: и ", + TestName = "Несколько ссылок распознаются")] + [TestCase("Это не ссылка: .com", "Это не ссылка: .com", TestName = "Строка начинающаяся с точки com не считается ссылкой")] + public void ОбработкаСсылки(string text, string expected) + { + var html = md.Render(text); + html.Should().Be(expected); + } } \ No newline at end of file diff --git a/cs/Markdown/Node/HandelProcessor.cs b/cs/Markdown/Node/HandelProcessor.cs index 96e682073..9ca791a9d 100644 --- a/cs/Markdown/Node/HandelProcessor.cs +++ b/cs/Markdown/Node/HandelProcessor.cs @@ -35,6 +35,13 @@ public static void Score(List children, TokenCursor tokenCursor) tokenCursor.Move(); return; } + + if (nextToken.Type is TokenType.DoubleUnderscore or TokenType.Underscore) + { + children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); + tokenCursor.Move(); + return; + } var currentTokenType = currentToken.Type; var currentTokenPos = tokenCursor.Position; diff --git a/cs/Markdown/Node/NodeGenerator.cs b/cs/Markdown/Node/NodeGenerator.cs index c6c62e898..8c032dc0b 100644 --- a/cs/Markdown/Node/NodeGenerator.cs +++ b/cs/Markdown/Node/NodeGenerator.cs @@ -44,6 +44,10 @@ private static List GetChildrenNods(TokenCursor tokenCursor) case TokenType.Escape: HandelProcessor.Escape(children, tokenCursor); break; + case TokenType.Link: + children.Add(NodeFactory.Create(NodeType.Link, tokenCursor.Current.Value, null)); + tokenCursor.Move(); + break; case TokenType.NewLine: case TokenType.EndOfText: case TokenType.Text: diff --git a/cs/Markdown/Rendering.cs b/cs/Markdown/Rendering.cs index 5d7d5d7fb..dbf35fd9e 100644 --- a/cs/Markdown/Rendering.cs +++ b/cs/Markdown/Rendering.cs @@ -18,8 +18,7 @@ public string Render(IEnumerable nodes) if (nodes == null) return string.Empty; var sb = new StringBuilder(); - - // Идём последовательно по узлам верхнего уровня + foreach (var node in nodes) { RenderNode(node, sb); @@ -35,8 +34,6 @@ private static void RenderNode(Node node, StringBuilder sb) switch (node.Type) { case NodeType.Header: - // В соответствии с условием: абзац, начинающийся с "# ", ->

...

- // Дети содержат инлайны (Plain/Emphasis/Strong/Link) sb.Append("

"); foreach (var child in node.Children) RenderNode(child, sb); @@ -64,24 +61,13 @@ private static void RenderNode(Node node, StringBuilder sb) break; case NodeType.Link: - // В данной постановке правила ссылок не описаны подробно. - // Предположим семантику: Value = href, Children = текст ссылки. - // Если Children пусты — рендерим сам href как текст. - var href = WebUtility.HtmlEncode(node.Value ?? string.Empty); - sb.Append(""); - if (node.Children != null && node.Children.Count > 0) - RenderInlineChildren(node.Children, sb); - else - sb.Append(href); - sb.Append(""); + sb.Append(""); break; case NodeType.Text: default: - // Текстовые узлы: экранируем var text = WebUtility.HtmlEncode(node.Value ?? string.Empty); sb.Append(text); - // Если у Plain есть дети (на всякий случай) — тоже обойти if (node.Children != null && node.Children.Count > 0) RenderInlineChildren(node.Children, sb); break; diff --git a/cs/Markdown/Token/TextCursor.cs b/cs/Markdown/Token/TextCursor.cs index 304a179fa..d2a1392ae 100644 --- a/cs/Markdown/Token/TextCursor.cs +++ b/cs/Markdown/Token/TextCursor.cs @@ -35,4 +35,13 @@ public void Move(int count = 1) => public string Slice(int start, int end) => input.Substring(start, Math.Max(0, end - start)); + + public bool Matches(string text) + { + if (text == null || Position + text.Length > input.Length) + return false; + + return !text.Where((t, i) => input[Position + i] != t).Any(); + } + } \ No newline at end of file diff --git a/cs/Markdown/TokenCreateRules/DigitRule.cs b/cs/Markdown/Token/TokenCreateRules/DigitRule.cs similarity index 100% rename from cs/Markdown/TokenCreateRules/DigitRule.cs rename to cs/Markdown/Token/TokenCreateRules/DigitRule.cs diff --git a/cs/Markdown/TokenCreateRules/EscapeRule.cs b/cs/Markdown/Token/TokenCreateRules/EscapeRule.cs similarity index 100% rename from cs/Markdown/TokenCreateRules/EscapeRule.cs rename to cs/Markdown/Token/TokenCreateRules/EscapeRule.cs diff --git a/cs/Markdown/TokenCreateRules/HeaderRule.cs b/cs/Markdown/Token/TokenCreateRules/HeaderRule.cs similarity index 100% rename from cs/Markdown/TokenCreateRules/HeaderRule.cs rename to cs/Markdown/Token/TokenCreateRules/HeaderRule.cs diff --git a/cs/Markdown/TokenCreateRules/ITokenRule.cs b/cs/Markdown/Token/TokenCreateRules/ITokenRule.cs similarity index 100% rename from cs/Markdown/TokenCreateRules/ITokenRule.cs rename to cs/Markdown/Token/TokenCreateRules/ITokenRule.cs diff --git a/cs/Markdown/Token/TokenCreateRules/LinkRule.cs b/cs/Markdown/Token/TokenCreateRules/LinkRule.cs new file mode 100644 index 000000000..0077f8344 --- /dev/null +++ b/cs/Markdown/Token/TokenCreateRules/LinkRule.cs @@ -0,0 +1,105 @@ +namespace Markdown; + +public class LinkRule : ITokenRule +{ + private static readonly string[] ValidLinkPrefixes = + [ + "http://", + "https://", + "www." + ]; + + public Token? TryReadTokenAndMoveCursor(TextCursor cursor) + { + if (cursor.End) + return null; + + var start = cursor.Position; + + if (!IsLinkStart(cursor)) + { + cursor.Revert(start); + return null; + } + + while (!cursor.End && IsLinkChar(cursor.Current)) + cursor.Move(); + + var end = cursor.Position; + + var value = cursor.Slice(start, end); + + if (!IsValidLinkStart(value)) + { + cursor.Revert(start); + return null; + } + + return TokenFactory.Create(TokenType.Link, value, start); + } + + private static bool IsLinkStart(TextCursor cursor) + { + var mark = cursor.Position; + + if (ValidLinkPrefixes.Any(cursor.Matches)) + { + cursor.Revert(mark); + return true; + } + + if (char.IsLetterOrDigit(cursor.Current)) + { + var pos = cursor.Position; + var hasDot = false; + + while (pos < cursor.Length && !char.IsWhiteSpace(cursor.Peek(pos - cursor.Position))) + { + if (cursor.Peek(pos - cursor.Position) == '.') + { + hasDot = true; + break; + } + pos++; + } + + cursor.Revert(mark); + if (hasDot) + return true; + } + else + { + cursor.Revert(mark); + } + + return false; + } + + private static bool IsLinkChar(char c) + { + if (char.IsWhiteSpace(c)) + return false; + + switch (c) + { + case ')': + case ']': + case '<': + case '>': + case '\"': + return false; + } + + return true; + } + + private static bool IsValidLinkStart(string value) + { + if (ValidLinkPrefixes.Any(prefix => string.Equals(value, prefix, StringComparison.OrdinalIgnoreCase))) + return false; + + var dotIndex = value.IndexOf('.'); + return dotIndex > 0 && dotIndex != value.Length - 1; + } + +} diff --git a/cs/Markdown/TokenCreateRules/NewLineRule.cs b/cs/Markdown/Token/TokenCreateRules/NewLineRule.cs similarity index 100% rename from cs/Markdown/TokenCreateRules/NewLineRule.cs rename to cs/Markdown/Token/TokenCreateRules/NewLineRule.cs diff --git a/cs/Markdown/TokenCreateRules/TextRunRule.cs b/cs/Markdown/Token/TokenCreateRules/TextRunRule.cs similarity index 100% rename from cs/Markdown/TokenCreateRules/TextRunRule.cs rename to cs/Markdown/Token/TokenCreateRules/TextRunRule.cs diff --git a/cs/Markdown/TokenCreateRules/UnderscoreRule.cs b/cs/Markdown/Token/TokenCreateRules/UnderscoreRule.cs similarity index 100% rename from cs/Markdown/TokenCreateRules/UnderscoreRule.cs rename to cs/Markdown/Token/TokenCreateRules/UnderscoreRule.cs diff --git a/cs/Markdown/TokenCreateRules/WhiteSpaceRule.cs b/cs/Markdown/Token/TokenCreateRules/WhiteSpaceRule.cs similarity index 100% rename from cs/Markdown/TokenCreateRules/WhiteSpaceRule.cs rename to cs/Markdown/Token/TokenCreateRules/WhiteSpaceRule.cs diff --git a/cs/Markdown/Token/TokenGenerator.cs b/cs/Markdown/Token/TokenGenerator.cs index 3a7268b98..2634457b1 100644 --- a/cs/Markdown/Token/TokenGenerator.cs +++ b/cs/Markdown/Token/TokenGenerator.cs @@ -6,6 +6,7 @@ public class TokenGenerator : ITokenGenerator { private readonly List rules = new() { + new LinkRule(), new NewLineRule(), new WhiteSpaceRule(), new EscapeRule(), @@ -20,7 +21,6 @@ public List Tokenize(string text) if (string.IsNullOrEmpty(text)) return null; var tokens = new List(); - //todo: подумать про строку из пробелов - IsNullOfWhitespace var cursor = new TextCursor(text); while (!cursor.End) diff --git a/cs/Markdown/Token/TokenType.cs b/cs/Markdown/Token/TokenType.cs index be9596009..903069009 100644 --- a/cs/Markdown/Token/TokenType.cs +++ b/cs/Markdown/Token/TokenType.cs @@ -9,5 +9,6 @@ public enum TokenType NewLine, Escape, EndOfText, - WhiteSpace + WhiteSpace, + Link } \ No newline at end of file From 16d878cf2aa0a26e2af43fcb766f0082961fbb0e Mon Sep 17 00:00:00 2001 From: MihailK Date: Fri, 21 Nov 2025 21:05:05 +0500 Subject: [PATCH 12/13] =?UTF-8?q?=D0=98=D0=B7=D0=BC=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=BB=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D1=83?= =?UTF-8?q?=20=D0=BF=D0=BE=D0=B4=D1=87=D1=91=D1=80=D0=BA=D0=B8=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/MarkdownTest/RenderTest.cs | 22 +-- cs/Markdown/Node/HandelProcessor.cs | 185 +----------------- .../Node/ScoreRule/BetweenDigitsScoreRule.cs | 18 ++ cs/Markdown/Node/ScoreRule/IScoreRule.cs | 6 + .../Node/ScoreRule/NextIsScoreScoreRule.cs | 19 ++ .../NextIsWhitespaceOrEndScoreRule.cs | 17 ++ cs/Markdown/Node/ScoreRule/ScoreContext.cs | 26 +++ cs/Markdown/Node/ScoreRule/ScoreRule.cs | 157 +++++++++++++++ cs/Markdown/Node/ScoreRule/ScoreUtils.cs | 17 ++ cs/Markdown/Rendering.cs | 18 +- 10 files changed, 287 insertions(+), 198 deletions(-) create mode 100644 cs/Markdown/Node/ScoreRule/BetweenDigitsScoreRule.cs create mode 100644 cs/Markdown/Node/ScoreRule/IScoreRule.cs create mode 100644 cs/Markdown/Node/ScoreRule/NextIsScoreScoreRule.cs create mode 100644 cs/Markdown/Node/ScoreRule/NextIsWhitespaceOrEndScoreRule.cs create mode 100644 cs/Markdown/Node/ScoreRule/ScoreContext.cs create mode 100644 cs/Markdown/Node/ScoreRule/ScoreRule.cs create mode 100644 cs/Markdown/Node/ScoreRule/ScoreUtils.cs diff --git a/cs/Markdown/MarkdownTest/RenderTest.cs b/cs/Markdown/MarkdownTest/RenderTest.cs index 494a90477..2ea983f86 100644 --- a/cs/Markdown/MarkdownTest/RenderTest.cs +++ b/cs/Markdown/MarkdownTest/RenderTest.cs @@ -6,14 +6,14 @@ namespace MarkdownTest; public class RenderTest { - private readonly Md md = new Md(); + private readonly Md md = new(); [TestCase("_курсив_", "курсив", TestName = "Просто курсив")] [TestCase("__полужирный__", "полужирный", TestName = "Просто полужирный")] [TestCase("_это просто текст_", "это просто текст", TestName = "Просто текст с курсивом")] [TestCase("__a _b_ c__", "a b c", TestName = "Курсив внутри полужирного должен работать")] - public void Проверка_ПравильногоВыделения(string text, string expected) + public void Render_ShouldApplySimpleEmphasisAndStrong_WhenBasicMarkdownUsed(string text, string expected) { var html = md.Render(text); html.Should().Be(expected); @@ -27,7 +27,7 @@ public void Проверка_ПравильногоВыделения(string tex TestName = "Пересечение разных выделений")] [TestCase("_пересечение __одинарных_ и двойных__", "_пересечение __одинарных_ и двойных__", TestName = "Пересечение разных выделений")] - public void Проверка_НеправильногоВыделения(string text, string expected) + public void Render_ShouldSkipIncorrectOrIntersectedTags_WhenWrongMarkdownUsed(string text, string expected) { var html = md.Render(text); html.Should().Be(expected); @@ -44,14 +44,14 @@ public void Проверка_НеправильногоВыделения(string [TestCase("в ра_зных сл_овах", "в ра_зных сл_овах", TestName = "Выделение в разных словах не работает")] [TestCase("эти_ подчерки_ не считаются выделением", "эти_ подчерки_ не считаются выделением", TestName = "Пробельный символ после подчерка")] - public void Выделение_внутри_слова(string text, string expected) + public void Render_ShouldHandleEmphasisInsideWordsCorrectly_WhenInsideWordsAndBordersPresent(string text, string expected) { var html = md.Render(text); html.Should().Be(expected); } [TestCase("_____", "_____", TestName = "Подряд идущие подчёркивания")] - public void ПростоПодчёркивания(string text, string expected) + public void Render_ShouldOutputPlainUnderscores_WhenContinuousUnderscoresGiven(string text, string expected) { var html = md.Render(text); html.Should().Be(expected); @@ -59,8 +59,8 @@ public void ПростоПодчёркивания(string text, string expected) [TestCase("# Заголовок", "

Заголовок

", TestName = "Заголовок")] - [TestCase("# Заголовок\n_курсив_ и __жирный__ и вну_три", - "

Заголовок

\nкурсив и жирный и вну_три", + [TestCase("# Заголовок\n_курсив_ и __жирный__", + "

Заголовок

\nкурсив и жирный", TestName = "Сохраняется_перенос_после_заголовка_и_инлайн_на_следующей_строке")] [TestCase("# Заголовок1\n_курсив_ и __жирный__ и вну_три\n# Заголовок2\nкапибара", "

Заголовок1

\nкурсив и жирный и вну_три\n

Заголовок2

\nкапибара", @@ -69,7 +69,7 @@ public void ПростоПодчёркивания(string text, string expected) "

Заголовок

\nпросто текст", TestName = "Заголовок и перенос строки")] [TestCase("# Это заголовок c # внутри", "

Это заголовок c # внутри

", TestName = "Решётка внутри заголовка")] - public void ВерныйЗаголовок(string text, string expected) + public void Render_ShouldProduceHeadersCorrectly_WhenValidHeaderMarkdownProvided(string text, string expected) { var html = md.Render(text); html.Should().Be(expected); @@ -78,7 +78,7 @@ public void ВерныйЗаголовок(string text, string expected) [TestCase("#Это не заголовок", "#Это не заголовок", TestName = "Нет пробела после #")] [TestCase("Это # не заголовок", "Это # не заголовок", TestName = "# внутри текста не заголовок")] [TestCase("Это не заголовок #", "Это не заголовок #", TestName = "# после текста не заголовок")] - public void НеверныйЗаголовок(string text, string expected) + public void Render_ShouldNotProduceHeaders_WhenHeaderMarkdownIsInvalid(string text, string expected) { var html = md.Render(text); html.Should().Be(expected); @@ -89,7 +89,7 @@ public void НеверныйЗаголовок(string text, string expected) [TestCase(@"\# Заголовок", "# Заголовок", TestName = "Экранирование заголовка")] [TestCase(@"\\_это работает_", @"\это работает", TestName = "Экранирование экранирования")] [TestCase(@"_это не\_ работает_", "это не_ работает", TestName = "Экранирование внутри")] - public void Экранирование(string text, string expected) + public void Render_ShouldEscapeSpecialMarkdownCharacters_WhenEscapesPresent(string text, string expected) { var html = md.Render(text); html.Should().Be(expected); @@ -102,7 +102,7 @@ public void Экранирование(string text, string expected) "Несколько ссылок:
и ", TestName = "Несколько ссылок распознаются")] [TestCase("Это не ссылка: .com", "Это не ссылка: .com", TestName = "Строка начинающаяся с точки com не считается ссылкой")] - public void ОбработкаСсылки(string text, string expected) + public void Render_ShouldRecognizeAndLinkifyUrls_WhenLinksArePresent(string text, string expected) { var html = md.Render(text); html.Should().Be(expected); diff --git a/cs/Markdown/Node/HandelProcessor.cs b/cs/Markdown/Node/HandelProcessor.cs index 9ca791a9d..9cb3d933f 100644 --- a/cs/Markdown/Node/HandelProcessor.cs +++ b/cs/Markdown/Node/HandelProcessor.cs @@ -2,6 +2,13 @@ public static class HandelProcessor { + private static readonly IScoreRule[] ScoreRules = + [ + new BetweenDigitsScoreRule(), + new NextIsWhitespaceOrEndScoreRule(), + new NextIsScoreScoreRule(), + new DefaultScoreRule() + ]; public static void Escape(List children, TokenCursor tokenCursor) { tokenCursor.Move(); @@ -23,183 +30,9 @@ public static void Escape(List children, TokenCursor tokenCursor) tokenCursor.Move(); } - public static void Score(List children, TokenCursor tokenCursor) - { - var currentToken = tokenCursor.Current; - var previousToken = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position - 1); - var nextToken = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position + 1); - - if (TokenBetweenDigits(previousToken, nextToken) || ItWhiteSpaceOrEndToken(nextToken)) - { - children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); - tokenCursor.Move(); - return; - } - - if (nextToken.Type is TokenType.DoubleUnderscore or TokenType.Underscore) - { - children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); - tokenCursor.Move(); - return; - } - - var currentTokenType = currentToken.Type; - var currentTokenPos = tokenCursor.Position; - var currentNodeType = currentTokenType == TokenType.DoubleUnderscore ? NodeType.Strong : NodeType.Emphasis; - - var innerScore = new List<(int Index, TokenType Type)>(); - var closeIndex = -1; - - for (var i = currentTokenPos + 1; i < tokenCursor.TokenCount; i++) - { - var t = tokenCursor.tokens[i]; - - - if (t.Type is TokenType.NewLine or TokenType.EndOfText) - break; - - if (t.Type == currentTokenType) - { - var left = tokenCursor.tokens.ElementAtOrDefault(i - 1); - var right = tokenCursor.tokens.ElementAtOrDefault(i + 1); - if (left?.Type != TokenType.WhiteSpace && - right?.Type is TokenType.WhiteSpace or TokenType.EndOfText) - { - closeIndex = i; - break; - } - } - - if (t.Type == TokenType.Underscore || t.Type == TokenType.DoubleUnderscore) - innerScore.Add((i, t.Type)); - } - - if (closeIndex < 0) - { - for (var i = currentTokenPos + 1; i < tokenCursor.TokenCount; i++) - { - var token = tokenCursor.tokens[i]; - if (token.Type == TokenType.WhiteSpace) - { - children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); - tokenCursor.Move(); - return; - } - - if (token.Type == currentTokenType) - { - closeIndex = i; - break; - } - } - - if (closeIndex < 0) - { - children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); - tokenCursor.Move(); - return; - } - - var nodes = GetChildren(tokenCursor, currentTokenPos + 1, closeIndex); - children.Add(NodeFactory.Create(currentNodeType, null, nodes)); - tokenCursor.Move(closeIndex - currentTokenPos + 1); - return; - } - - var start = currentTokenPos + 1; - var end = closeIndex; - - if (HasCrossingPairs(closeIndex, innerScore, tokenCursor)) - { - children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); - children.Add(NodeFactory.Create(NodeType.Text, null, GetChildren(tokenCursor, start, end))); - children.Add(NodeFactory.Create(NodeType.Text, currentToken.Value, null)); - tokenCursor.Move(closeIndex - currentTokenPos + 1); - return; - } - - switch (currentTokenType) - { - case TokenType.Underscore: - { - var hasDoubleInside = innerScore.Any(u => - u.Type == TokenType.DoubleUnderscore && u.Index > start && u.Index < end); - if (hasDoubleInside) - { - var node = NodeFactory.Create(NodeType.Emphasis, null, GetChildren(tokenCursor, start, end)); - children.Add(node); - } - else - { - var node = NodeFactory.Create(NodeType.Emphasis, null, GetChildren(tokenCursor, start, end)); - children.Add(node); - } - - tokenCursor.Move(closeIndex - currentTokenPos + 1); - return; - } - case TokenType.DoubleUnderscore: - { - var childTokens = tokenCursor.tokens.Slice(currentTokenPos + 1, closeIndex - currentTokenPos); - var childNode = new NodeGenerator().Create(childTokens); - children.Add(NodeFactory.Create(currentNodeType, null, childNode)); - tokenCursor.Move(end - currentTokenPos + 1); - break; - } - } - } - private static bool HasCrossingPairs(int closeIndex, - List<(int Index, TokenType Type)> innerUnderscores, - TokenCursor cursor) - { - var scoreType = cursor.Current.Type == TokenType.Underscore? TokenType.DoubleUnderscore : TokenType.Underscore; - foreach (var u in innerUnderscores.Where(x => x.Type == scoreType)) - { - for (var j = u.Index + 1; j < cursor.TokenCount; j++) - { - var t = cursor.tokens[j]; - if (t.Type is TokenType.NewLine or TokenType.EndOfText) - break; - if (t.Type == scoreType) - { - var left = cursor.tokens.ElementAtOrDefault(j - 1); - if (left?.Type != TokenType.WhiteSpace) - { - if (j > closeIndex) - return true; - break; - } - } - } - } - - return false; - } - - private static List GetChildren(TokenCursor cursor, int start, int end) - { - var slice = new List(); - for (int i = start; i < end; i++) - { - var t = cursor.tokens[i]; - slice.Add(NodeFactory.Create(NodeType.Text, t.Value, null)); - } - - return slice; - } - - private static bool ItWhiteSpaceOrEndToken(Token? token) - { - return token is { Type: TokenType.WhiteSpace } or { Type: TokenType.EndOfText }; - } - - private static bool TokenBetweenDigits(Token? previousToken, Token? nextToken) + public static void Score(List children, TokenCursor tokenCursor) { - var leftTokenIsDigit = previousToken is { Type: TokenType.Text, Value.Length: > 0 } && - char.IsDigit(previousToken.Value.Last()); - var rightTokenIsDigit = nextToken is { Type: TokenType.Text, Value.Length: > 0 } && - char.IsDigit(nextToken.Value.First()); - return leftTokenIsDigit && rightTokenIsDigit; + if (ScoreRules.Any(rule => rule.TryApply(children, tokenCursor))); } } \ No newline at end of file diff --git a/cs/Markdown/Node/ScoreRule/BetweenDigitsScoreRule.cs b/cs/Markdown/Node/ScoreRule/BetweenDigitsScoreRule.cs new file mode 100644 index 000000000..25d6ac067 --- /dev/null +++ b/cs/Markdown/Node/ScoreRule/BetweenDigitsScoreRule.cs @@ -0,0 +1,18 @@ +namespace Markdown; + +public sealed class BetweenDigitsScoreRule : IScoreRule +{ + public bool TryApply(List children, TokenCursor tokenCursor) + { + var current = tokenCursor.Current; + var prev = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position - 1); + var next = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position + 1); + + if (!ScoreUtils.TokenBetweenDigits(prev, next)) + return false; + + children.Add(NodeFactory.Create(NodeType.Text, current.Value, null)); + tokenCursor.Move(); + return true; + } +} \ No newline at end of file diff --git a/cs/Markdown/Node/ScoreRule/IScoreRule.cs b/cs/Markdown/Node/ScoreRule/IScoreRule.cs new file mode 100644 index 000000000..2209059aa --- /dev/null +++ b/cs/Markdown/Node/ScoreRule/IScoreRule.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public interface IScoreRule +{ + bool TryApply(List children, TokenCursor tokenCursor); +} \ No newline at end of file diff --git a/cs/Markdown/Node/ScoreRule/NextIsScoreScoreRule.cs b/cs/Markdown/Node/ScoreRule/NextIsScoreScoreRule.cs new file mode 100644 index 000000000..4ac56d923 --- /dev/null +++ b/cs/Markdown/Node/ScoreRule/NextIsScoreScoreRule.cs @@ -0,0 +1,19 @@ +namespace Markdown; + +public sealed class NextIsScoreScoreRule : IScoreRule +{ + public bool TryApply(List children, TokenCursor tokenCursor) + { + var current = tokenCursor.Current; + var next = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position + 1); + if (next is null) + return false; + + if (next.Type is not (TokenType.Underscore or TokenType.DoubleUnderscore)) + return false; + + children.Add(NodeFactory.Create(NodeType.Text, current.Value, null)); + tokenCursor.Move(); + return true; + } +} \ No newline at end of file diff --git a/cs/Markdown/Node/ScoreRule/NextIsWhitespaceOrEndScoreRule.cs b/cs/Markdown/Node/ScoreRule/NextIsWhitespaceOrEndScoreRule.cs new file mode 100644 index 000000000..6c28a6846 --- /dev/null +++ b/cs/Markdown/Node/ScoreRule/NextIsWhitespaceOrEndScoreRule.cs @@ -0,0 +1,17 @@ +namespace Markdown; + +public sealed class NextIsWhitespaceOrEndScoreRule : IScoreRule +{ + public bool TryApply(List children, TokenCursor tokenCursor) + { + var current = tokenCursor.Current; + var next = tokenCursor.tokens.ElementAtOrDefault(tokenCursor.Position + 1); + + if (!ScoreUtils.IsWhiteSpaceOrEndToken(next)) + return false; + + children.Add(NodeFactory.Create(NodeType.Text, current.Value, null)); + tokenCursor.Move(); + return true; + } +} \ No newline at end of file diff --git a/cs/Markdown/Node/ScoreRule/ScoreContext.cs b/cs/Markdown/Node/ScoreRule/ScoreContext.cs new file mode 100644 index 000000000..d6bc43b12 --- /dev/null +++ b/cs/Markdown/Node/ScoreRule/ScoreContext.cs @@ -0,0 +1,26 @@ +namespace Markdown; + +public sealed class ScoreContext +{ + public List Children { get; } + public TokenCursor Cursor { get; } + public Token CurrentToken { get; } + public TokenType CurrentTokenType { get; } + public int CurrentTokenPos { get; } + public NodeType CurrentNodeType { get; } + + public List<(int Index, TokenType Type)> InnerScore { get; } = new(); + public int CloseIndex { get; set; } = -1; + + public ScoreContext(List children, TokenCursor cursor) + { + Children = children; + Cursor = cursor; + CurrentToken = cursor.Current; + CurrentTokenType = CurrentToken.Type; + CurrentTokenPos = cursor.Position; + CurrentNodeType = CurrentTokenType == TokenType.DoubleUnderscore + ? NodeType.Strong + : NodeType.Emphasis; + } +} \ No newline at end of file diff --git a/cs/Markdown/Node/ScoreRule/ScoreRule.cs b/cs/Markdown/Node/ScoreRule/ScoreRule.cs new file mode 100644 index 000000000..843399385 --- /dev/null +++ b/cs/Markdown/Node/ScoreRule/ScoreRule.cs @@ -0,0 +1,157 @@ +using Markdown; + +public sealed class DefaultScoreRule : IScoreRule +{ + public bool TryApply(List children, TokenCursor tokenCursor) + { + var ctx = new ScoreContext(children, tokenCursor); + + ctx.InnerScore.Clear(); + ctx.CloseIndex = FindCloseIndex(ctx); + + return ctx.CloseIndex < 0 + ? TryHandleWithoutExplicitCloser(ctx) + : HandleWithExplicitCloser(ctx); + } + + private static bool TryHandleWithoutExplicitCloser(ScoreContext ctx) + { + ctx.CloseIndex = FindFallbackCloser(ctx); + if (ctx.CloseIndex < 0) + { + EmitLiteral(ctx, ctx.CurrentToken.Value); + ctx.Cursor.Move(); + return true; + } + + var nodes = GetChildren(ctx, ctx.CurrentTokenPos + 1, ctx.CloseIndex); + ctx.Children.Add(NodeFactory.Create(ctx.CurrentNodeType, null, nodes)); + ctx.Cursor.Move(ctx.CloseIndex - ctx.CurrentTokenPos + 1); + return true; + } + + private static bool HandleWithExplicitCloser(ScoreContext ctx) + { + var start = ctx.CurrentTokenPos + 1; + + if (HasCrossingPairs(ctx)) + { + EmitLiteral(ctx, ctx.CurrentToken.Value); + ctx.Children.Add(NodeFactory.Create( + NodeType.Text, null, GetChildren(ctx, start, ctx.CloseIndex))); + EmitLiteral(ctx, ctx.CurrentToken.Value); + ctx.Cursor.Move(ctx.CloseIndex - ctx.CurrentTokenPos + 1); + return true; + } + + switch (ctx.CurrentTokenType) + { + case TokenType.Underscore: + { + var node = NodeFactory.Create( + NodeType.Emphasis, null, GetChildren(ctx, start, ctx.CloseIndex)); + ctx.Children.Add(node); + ctx.Cursor.Move(ctx.CloseIndex - ctx.CurrentTokenPos + 1); + return true; + } + case TokenType.DoubleUnderscore: + { + var childTokens = ctx.Cursor.tokens.Slice( + ctx.CurrentTokenPos + 1, ctx.CloseIndex - ctx.CurrentTokenPos); + var childNode = new NodeGenerator().Create(childTokens); + ctx.Children.Add(NodeFactory.Create(ctx.CurrentNodeType, null, childNode)); + ctx.Cursor.Move(ctx.CloseIndex - ctx.CurrentTokenPos + 1); + return true; + } + default: + return false; + } + } + + private static int FindCloseIndex(ScoreContext ctx) + { + var closeIndex = -1; + + for (var i = ctx.CurrentTokenPos + 1; i < ctx.Cursor.TokenCount; i++) + { + var t = ctx.Cursor.tokens[i]; + + if (t.Type is TokenType.NewLine or TokenType.EndOfText) + break; + + if (t.Type == ctx.CurrentTokenType) + { + var left = ctx.Cursor.tokens.ElementAtOrDefault(i - 1); + var right = ctx.Cursor.tokens.ElementAtOrDefault(i + 1); + if (left?.Type != TokenType.WhiteSpace && + right?.Type is TokenType.WhiteSpace or TokenType.EndOfText) + { + closeIndex = i; + break; + } + } + + if (t.Type == TokenType.Underscore || t.Type == TokenType.DoubleUnderscore) + ctx.InnerScore.Add((i, t.Type)); + } + + return closeIndex; + } + + private static bool HasCrossingPairs(ScoreContext ctx) + { + var scoreType = ctx.CurrentTokenType == TokenType.Underscore + ? TokenType.DoubleUnderscore + : TokenType.Underscore; + + foreach (var u in ctx.InnerScore.Where(x => x.Type == scoreType)) + { + for (var j = u.Index + 1; j < ctx.Cursor.TokenCount; j++) + { + var t = ctx.Cursor.tokens[j]; + if (t.Type is TokenType.NewLine or TokenType.EndOfText) + break; + + if (t.Type != scoreType) continue; + var left = ctx.Cursor.tokens.ElementAtOrDefault(j - 1); + if (left?.Type == TokenType.WhiteSpace) continue; + if (j > ctx.CloseIndex) + return true; + break; + } + } + + return false; + } + + private static List GetChildren(ScoreContext ctx, int start, int end) + { + var slice = new List(); + for (var i = start; i < end; i++) + { + var t = ctx.Cursor.tokens[i]; + slice.Add(NodeFactory.Create(NodeType.Text, t.Value, null)); + } + + return slice; + } + + private static int FindFallbackCloser(ScoreContext ctx) + { + for (var i = ctx.CurrentTokenPos + 1; i < ctx.Cursor.TokenCount; i++) + { + var token = ctx.Cursor.tokens[i]; + + if (token.Type == TokenType.WhiteSpace) + return -1; + + if (token.Type == ctx.CurrentTokenType) + return i; + } + + return -1; + } + + private static void EmitLiteral(ScoreContext ctx, string value) => + ctx.Children.Add(NodeFactory.Create(NodeType.Text, value, null)); +} diff --git a/cs/Markdown/Node/ScoreRule/ScoreUtils.cs b/cs/Markdown/Node/ScoreRule/ScoreUtils.cs new file mode 100644 index 000000000..5d27cd2d1 --- /dev/null +++ b/cs/Markdown/Node/ScoreRule/ScoreUtils.cs @@ -0,0 +1,17 @@ +namespace Markdown; + +public static class ScoreUtils +{ + public static bool IsWhiteSpaceOrEndToken(Token? token) => + token is { Type: TokenType.WhiteSpace } or { Type: TokenType.EndOfText }; + + public static bool TokenBetweenDigits(Token? previousToken, Token? nextToken) + { + var leftTokenIsDigit = previousToken is { Type: TokenType.Text, Value.Length: > 0 } + && char.IsDigit(previousToken.Value[^1]); + var rightTokenIsDigit = nextToken is { Type: TokenType.Text, Value.Length: > 0 } + && char.IsDigit(nextToken.Value[0]); + return leftTokenIsDigit && rightTokenIsDigit; + } + +} diff --git a/cs/Markdown/Rendering.cs b/cs/Markdown/Rendering.cs index dbf35fd9e..4e23bfc5d 100644 --- a/cs/Markdown/Rendering.cs +++ b/cs/Markdown/Rendering.cs @@ -13,12 +13,12 @@ public interface IRenderer public class HtmlRenderer : IRenderer { - public string Render(IEnumerable nodes) + public string Render(IEnumerable? nodes) { if (nodes == null) return string.Empty; var sb = new StringBuilder(); - + foreach (var node in nodes) { RenderNode(node, sb); @@ -27,7 +27,7 @@ public string Render(IEnumerable nodes) return sb.ToString(); } - private static void RenderNode(Node node, StringBuilder sb) + private static void RenderNode(Node? node, StringBuilder sb) { if (node == null) return; @@ -42,11 +42,8 @@ private static void RenderNode(Node node, StringBuilder sb) case NodeType.Strong: sb.Append(""); - if (node.Children != null) - foreach (var child in node.Children) - RenderNode(child, sb); - else - sb.Append(node.Value); + foreach (var child in node.Children) + RenderNode(child, sb); sb.Append(""); break; @@ -66,7 +63,7 @@ private static void RenderNode(Node node, StringBuilder sb) case NodeType.Text: default: - var text = WebUtility.HtmlEncode(node.Value ?? string.Empty); + var text = WebUtility.HtmlEncode(node.Value); sb.Append(text); if (node.Children != null && node.Children.Count > 0) RenderInlineChildren(node.Children, sb); @@ -74,11 +71,10 @@ private static void RenderNode(Node node, StringBuilder sb) } } - private static void RenderInlineChildren(List children, System.Text.StringBuilder sb) + private static void RenderInlineChildren(List? children, StringBuilder sb) { if (children == null || children.Count == 0) return; - // Соседние Plain можно слить ещё на этапе генерации; здесь просто обходим foreach (var child in children) RenderNode(child, sb); } From 5dfdb2a926e2c465d3a1b69ad3e56c93dc1702cf Mon Sep 17 00:00:00 2001 From: MihailK Date: Sun, 23 Nov 2025 18:01:50 +0500 Subject: [PATCH 13/13] =?UTF-8?q?=D0=94=D0=BE=D0=B4=D0=B5=D0=BB=D0=B0?= =?UTF-8?q?=D0=BB=20=D0=BF=D1=80=D0=B0=D0=B2=D0=B8=D0=BB=D1=8C=D0=BD=D1=83?= =?UTF-8?q?=D1=8E=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D1=83?= =?UTF-8?q?=20md=20=D1=81=D1=81=D1=8B=D0=BB=D0=BA=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/MarkdownTest/RenderTest.cs | 41 ++++-- cs/Markdown/Rendering.cs | 15 ++- .../Token/TokenCreateRules/DigitRule.cs | 2 +- .../Token/TokenCreateRules/EscapeRule.cs | 4 +- .../Token/TokenCreateRules/HeaderRule.cs | 2 +- .../Token/TokenCreateRules/LinkRule.cs | 119 +++++++----------- .../Token/TokenCreateRules/NewLineRule.cs | 2 +- .../Token/TokenCreateRules/TextRunRule.cs | 2 +- cs/Markdown/Token/TokenCreateRules/URL.cs | 105 ++++++++++++++++ .../Token/TokenCreateRules/UnderscoreRule.cs | 4 +- .../Token/TokenCreateRules/WhiteSpaceRule.cs | 2 +- cs/Markdown/Token/TokenFactory.cs | 2 +- cs/Markdown/Token/TokenGenerator.cs | 12 +- 13 files changed, 210 insertions(+), 102 deletions(-) create mode 100644 cs/Markdown/Token/TokenCreateRules/URL.cs diff --git a/cs/Markdown/MarkdownTest/RenderTest.cs b/cs/Markdown/MarkdownTest/RenderTest.cs index 2ea983f86..ed7f2cc26 100644 --- a/cs/Markdown/MarkdownTest/RenderTest.cs +++ b/cs/Markdown/MarkdownTest/RenderTest.cs @@ -95,16 +95,43 @@ public void Render_ShouldEscapeSpecialMarkdownCharacters_WhenEscapesPresent(stri html.Should().Be(expected); } - [TestCase("Ссылка: https://ulearn.me", "Ссылка: ", TestName = "https распознаётся, как ссылка")] - [TestCase("Ссылка: www.example.org", "Ссылка: ", TestName = "www распознаётся, как ссылка")] - [TestCase("Ссылка: example.com", "Ссылка: ", TestName = "example.com распознаётся_как_ссылка")] - [TestCase("Несколько ссылок: https://a.ru и http://b.com", - "Несколько ссылок: и ", - TestName = "Несколько ссылок распознаются")] - [TestCase("Это не ссылка: .com", "Это не ссылка: .com", TestName = "Строка начинающаяся с точки com не считается ссылкой")] + + [TestCase( + "Ссылка: [text](https://example.com \"Подсказка\") просто текст", + "Ссылка: text просто текст", + TestName = "Преобразование ссылки из Markdown в HTML")] + [TestCase( + "Ссылка: [text]https://example.com просто текст", + "Ссылка: [text]https://example.com просто текст", + TestName = "Без скобок после текста — это не ссылка")] + [TestCase( + "Ссылка: [text](https://example.com просто текст", + "Ссылка: [text](https://example.com просто текст", + TestName = "Нет закрывающей скобки — это не ссылка")] + [TestCase( + "[Тут ссылка](https://site.ru) и просто текст", + "Тут ссылка и просто текст", + TestName = "Классическая ссылка без подсказки")] + [TestCase( + "Вот [текст](https://example.com \"url и заголовок\") внутри строки", + "Вот текст внутри строки", + TestName = "Ссылка в середине строки")] + [TestCase( + "[text](https://)", + "[text](https://)", + TestName = "Пустой url в ссылке не считается ссылкой")] + [TestCase( + "[text]( )", + "[text]( )", + TestName = "Пустые скобки для url — ошибка")] + [TestCase( + "Обычный текст без ссылки", + "Обычный текст без ссылки", + TestName = "Без ссылок — простой текст")] public void Render_ShouldRecognizeAndLinkifyUrls_WhenLinksArePresent(string text, string expected) { var html = md.Render(text); html.Should().Be(expected); } + } \ No newline at end of file diff --git a/cs/Markdown/Rendering.cs b/cs/Markdown/Rendering.cs index 4e23bfc5d..3f033fff6 100644 --- a/cs/Markdown/Rendering.cs +++ b/cs/Markdown/Rendering.cs @@ -56,11 +56,22 @@ private static void RenderNode(Node? node, StringBuilder sb) sb.Append(node.Value); sb.Append(""); break; - + case NodeType.Link: - sb.Append(""); + var parts = node.Value?.Split('|'); + if (parts == null || parts.Length < 2) + return; + + var linkText = WebUtility.HtmlEncode(parts[0]); + var url = WebUtility.HtmlEncode(parts[1]); + var title = (parts.Length > 2 && !string.IsNullOrWhiteSpace(parts[2])) + ? $" title=\"{WebUtility.HtmlEncode(parts[2])}\"" + : ""; + + sb.Append($"{linkText}"); break; + case NodeType.Text: default: var text = WebUtility.HtmlEncode(node.Value); diff --git a/cs/Markdown/Token/TokenCreateRules/DigitRule.cs b/cs/Markdown/Token/TokenCreateRules/DigitRule.cs index b428fe31a..e2011d99c 100644 --- a/cs/Markdown/Token/TokenCreateRules/DigitRule.cs +++ b/cs/Markdown/Token/TokenCreateRules/DigitRule.cs @@ -17,6 +17,6 @@ public class DigitRule : ITokenRule } var value = cursor.Slice(start, cursor.Position); - return TokenFactory.Create(TokenType.WhiteSpace, value, start); + return TokenFactory.Create(TokenType.WhiteSpace, value); } } \ No newline at end of file diff --git a/cs/Markdown/Token/TokenCreateRules/EscapeRule.cs b/cs/Markdown/Token/TokenCreateRules/EscapeRule.cs index 5b365ded1..e5fed116f 100644 --- a/cs/Markdown/Token/TokenCreateRules/EscapeRule.cs +++ b/cs/Markdown/Token/TokenCreateRules/EscapeRule.cs @@ -13,9 +13,9 @@ public sealed class EscapeRule : ITokenRule { var s = cursor.Current.ToString(); cursor.Move(1); - return TokenFactory.Create(TokenType.Text, s, pos); + return TokenFactory.Create(TokenType.Text, s); } - return TokenFactory.Create(TokenType.Text, "\\", pos); + return TokenFactory.Create(TokenType.Text, "\\"); } } \ No newline at end of file diff --git a/cs/Markdown/Token/TokenCreateRules/HeaderRule.cs b/cs/Markdown/Token/TokenCreateRules/HeaderRule.cs index d1596ae3f..a80156399 100644 --- a/cs/Markdown/Token/TokenCreateRules/HeaderRule.cs +++ b/cs/Markdown/Token/TokenCreateRules/HeaderRule.cs @@ -10,7 +10,7 @@ public sealed class HeaderRule : ITokenRule { var currentPos = cursor.Position; cursor.Move(1); - return TokenFactory.Create(TokenType.HeaderMarker, "#", currentPos); + return TokenFactory.Create(TokenType.HeaderMarker, "#"); } return null; diff --git a/cs/Markdown/Token/TokenCreateRules/LinkRule.cs b/cs/Markdown/Token/TokenCreateRules/LinkRule.cs index 0077f8344..36082d6c6 100644 --- a/cs/Markdown/Token/TokenCreateRules/LinkRule.cs +++ b/cs/Markdown/Token/TokenCreateRules/LinkRule.cs @@ -2,104 +2,71 @@ public class LinkRule : ITokenRule { - private static readonly string[] ValidLinkPrefixes = - [ - "http://", - "https://", - "www." - ]; - public Token? TryReadTokenAndMoveCursor(TextCursor cursor) { - if (cursor.End) + var linkStart = cursor.Position; + if (cursor.Current != '[') return null; - var start = cursor.Position; - - if (!IsLinkStart(cursor)) - { - cursor.Revert(start); + var linkTextEnd = FindChar(cursor, ']'); + if (linkTextEnd == -1) return null; + var linkText = cursor.Slice(linkStart + 1, linkTextEnd); + cursor.Move(1); + + var parenStart = cursor.Position; + if (cursor.Current != '(') + { + return TokenFactory.Create(TokenType.Text, cursor.Slice(linkStart, linkTextEnd + 1)); } - - while (!cursor.End && IsLinkChar(cursor.Current)) - cursor.Move(); - var end = cursor.Position; - - var value = cursor.Slice(start, end); + var urlEnd = FindChar(cursor, ')'); + if (urlEnd == -1) + { + cursor.Revert(parenStart); + return TokenFactory.Create(TokenType.Text, cursor.Slice(linkStart, linkTextEnd + 1)); + } - if (!IsValidLinkStart(value)) + cursor.Revert(parenStart + 1); + if (!URL.ItsUrl(cursor)) { - cursor.Revert(start); + cursor.Revert(linkStart); return null; } - return TokenFactory.Create(TokenType.Link, value, start); - } + var linkArgs = cursor.Slice(parenStart + 1, urlEnd); + var url = linkArgs; + var title = ""; - private static bool IsLinkStart(TextCursor cursor) - { - var mark = cursor.Position; - - if (ValidLinkPrefixes.Any(cursor.Matches)) - { - cursor.Revert(mark); - return true; - } - - if (char.IsLetterOrDigit(cursor.Current)) + var titleStart = linkArgs.IndexOf('"'); + if (titleStart != -1) { - var pos = cursor.Position; - var hasDot = false; - - while (pos < cursor.Length && !char.IsWhiteSpace(cursor.Peek(pos - cursor.Position))) + url = linkArgs[..titleStart].Trim(); + int titleEnd = linkArgs.LastIndexOf('"'); + if (titleEnd > titleStart) { - if (cursor.Peek(pos - cursor.Position) == '.') - { - hasDot = true; - break; - } - pos++; + title = linkArgs.Substring(titleStart + 1, titleEnd - titleStart - 1); } - - cursor.Revert(mark); - if (hasDot) - return true; - } - else - { - cursor.Revert(mark); } - return false; + if (title.Length > 0) + cursor.Move(title.Length + 3); + cursor.Move(1); + + var tokenValue = $"{linkText}|{url}|{title}"; + return TokenFactory.Create(TokenType.Link, tokenValue); } - private static bool IsLinkChar(char c) + private static int FindChar(TextCursor cursor, char targetChar) { - if (char.IsWhiteSpace(c)) - return false; - - switch (c) + while (!cursor.End) { - case ')': - case ']': - case '<': - case '>': - case '\"': - return false; + cursor.Move(1); + if (cursor.Current == targetChar) + { + return cursor.Position; + } } - - return true; - } - - private static bool IsValidLinkStart(string value) - { - if (ValidLinkPrefixes.Any(prefix => string.Equals(value, prefix, StringComparison.OrdinalIgnoreCase))) - return false; - - var dotIndex = value.IndexOf('.'); - return dotIndex > 0 && dotIndex != value.Length - 1; + return -1; } - } diff --git a/cs/Markdown/Token/TokenCreateRules/NewLineRule.cs b/cs/Markdown/Token/TokenCreateRules/NewLineRule.cs index c83e55374..84dab6827 100644 --- a/cs/Markdown/Token/TokenCreateRules/NewLineRule.cs +++ b/cs/Markdown/Token/TokenCreateRules/NewLineRule.cs @@ -9,7 +9,7 @@ public Token TryReadTokenAndMoveCursor(TextCursor cursor) { var pos = cursor.Position; cursor.Move(1); - return TokenFactory.Create(TokenType.NewLine, "\n", pos); + return TokenFactory.Create(TokenType.NewLine, "\n"); } if (cursor.Current == '\r') diff --git a/cs/Markdown/Token/TokenCreateRules/TextRunRule.cs b/cs/Markdown/Token/TokenCreateRules/TextRunRule.cs index 540f25e90..5594dc9e1 100644 --- a/cs/Markdown/Token/TokenCreateRules/TextRunRule.cs +++ b/cs/Markdown/Token/TokenCreateRules/TextRunRule.cs @@ -28,7 +28,7 @@ public Token TryReadTokenAndMoveCursor(TextCursor cursor) } if (cursor.Position > start) - return TokenFactory.Create(TokenType.Text, cursor.Slice(start, cursor.Position), start); + return TokenFactory.Create(TokenType.Text, cursor.Slice(start, cursor.Position)); return null; } diff --git a/cs/Markdown/Token/TokenCreateRules/URL.cs b/cs/Markdown/Token/TokenCreateRules/URL.cs new file mode 100644 index 000000000..6f691cabb --- /dev/null +++ b/cs/Markdown/Token/TokenCreateRules/URL.cs @@ -0,0 +1,105 @@ +namespace Markdown; + +public static class URL +{ + private static readonly string[] ValidLinkPrefixes = + [ + "http://", + "https://", + "www." + ]; + + public static bool ItsUrl(TextCursor cursor) + { + if (cursor.End) + return false; + + var start = cursor.Position; + + if (!IsLinkStart(cursor)) + { + cursor.Revert(start); + return false; + } + + while (!cursor.End && IsLinkChar(cursor.Current)) + cursor.Move(); + + var end = cursor.Position; + + var value = cursor.Slice(start, end); + + if (!IsValidLinkStart(value)) + { + cursor.Revert(start); + return false; + } + + return true; + } + + private static bool IsLinkStart(TextCursor cursor) + { + var mark = cursor.Position; + + if (ValidLinkPrefixes.Any(cursor.Matches)) + { + cursor.Revert(mark); + return true; + } + + if (char.IsLetterOrDigit(cursor.Current)) + { + var pos = cursor.Position; + var hasDot = false; + + while (pos < cursor.Length && !char.IsWhiteSpace(cursor.Peek(pos - cursor.Position))) + { + if (cursor.Peek(pos - cursor.Position) == '.') + { + hasDot = true; + break; + } + pos++; + } + + cursor.Revert(mark); + if (hasDot) + return true; + } + else + { + cursor.Revert(mark); + } + + return false; + } + + private static bool IsLinkChar(char c) + { + if (char.IsWhiteSpace(c)) + return false; + + switch (c) + { + case ')': + case ']': + case '<': + case '>': + case '\"': + return false; + } + + return true; + } + + private static bool IsValidLinkStart(string value) + { + if (ValidLinkPrefixes.Any(prefix => string.Equals(value, prefix, StringComparison.OrdinalIgnoreCase))) + return false; + + var dotIndex = value.IndexOf('.'); + return dotIndex > 0 && dotIndex != value.Length - 1; + } + +} diff --git a/cs/Markdown/Token/TokenCreateRules/UnderscoreRule.cs b/cs/Markdown/Token/TokenCreateRules/UnderscoreRule.cs index 65b637ac3..c63146d7d 100644 --- a/cs/Markdown/Token/TokenCreateRules/UnderscoreRule.cs +++ b/cs/Markdown/Token/TokenCreateRules/UnderscoreRule.cs @@ -19,9 +19,9 @@ public Token TryReadTokenAndMoveCursor(TextCursor cursor) if (count >= 2) { cursor.Revert(pos + 2); - return TokenFactory.Create(TokenType.DoubleUnderscore, "__", pos); + return TokenFactory.Create(TokenType.DoubleUnderscore, "__"); } - return TokenFactory.Create(TokenType.Underscore, "_", pos); + return TokenFactory.Create(TokenType.Underscore, "_"); } } \ No newline at end of file diff --git a/cs/Markdown/Token/TokenCreateRules/WhiteSpaceRule.cs b/cs/Markdown/Token/TokenCreateRules/WhiteSpaceRule.cs index e3a308772..bedf5df62 100644 --- a/cs/Markdown/Token/TokenCreateRules/WhiteSpaceRule.cs +++ b/cs/Markdown/Token/TokenCreateRules/WhiteSpaceRule.cs @@ -17,6 +17,6 @@ public sealed class WhiteSpaceRule : ITokenRule } var value = cursor.Slice(start, cursor.Position); - return TokenFactory.Create(TokenType.WhiteSpace,value, start); + return TokenFactory.Create(TokenType.WhiteSpace,value); } } \ No newline at end of file diff --git a/cs/Markdown/Token/TokenFactory.cs b/cs/Markdown/Token/TokenFactory.cs index f0923c6b8..4de23f5af 100644 --- a/cs/Markdown/Token/TokenFactory.cs +++ b/cs/Markdown/Token/TokenFactory.cs @@ -2,6 +2,6 @@ public static class TokenFactory { - public static Token Create(TokenType type, string? info, int pos) + public static Token Create(TokenType type, string? info) => new Token { Type = type, Value = info }; } \ No newline at end of file diff --git a/cs/Markdown/Token/TokenGenerator.cs b/cs/Markdown/Token/TokenGenerator.cs index 2634457b1..a4a6f59b8 100644 --- a/cs/Markdown/Token/TokenGenerator.cs +++ b/cs/Markdown/Token/TokenGenerator.cs @@ -1,11 +1,9 @@ -using System.Collections.Generic; - -namespace Markdown; +namespace Markdown; public class TokenGenerator : ITokenGenerator { - private readonly List rules = new() - { + private readonly List rules = + [ new LinkRule(), new NewLineRule(), new WhiteSpaceRule(), @@ -14,7 +12,7 @@ public class TokenGenerator : ITokenGenerator new UnderscoreRule(), new TextRunRule(), new DigitRule() - }; + ]; public List Tokenize(string text) { @@ -28,7 +26,7 @@ public List Tokenize(string text) var token = TryCreateToken(cursor); tokens.Add(token); } - tokens.Add(TokenFactory.Create(TokenType.EndOfText, null, text.Length)); + tokens.Add(TokenFactory.Create(TokenType.EndOfText, null)); return tokens; }