Forráskód Böngészése

Parser implementation with lexer state machine

lord-executor 8 éve
szülő
commit
45cbc3b17e

+ 1 - 1
Parser.Tests/Integration/ParserIntegrationTests.cs

@@ -45,7 +45,7 @@ namespace Larkdown.Parser.Tests.Integration
 			var ast = parser.Parse(source);
 
 			ast.Nodes.Should().HaveCount(3);
-			ast.Nodes.Should().AllBeOfType<ParagraphNode>();
+			ast.Nodes.Should().AllBeOfType<TextNode>();
 		}
     }
 }

+ 17 - 0
Parser/Ast/BlockNode.cs

@@ -0,0 +1,17 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Larkdown.Parser.Ast
+{
+	public abstract class BlockNode : Node
+	{
+		public int Indent { get; }
+
+		protected BlockNode(string type, int indent)
+			: base(type)
+		{
+			Indent = indent;
+		}
+    }
+}

+ 4 - 4
Parser/Ast/ParagraphNode.cs → Parser/Ast/TextNode.cs

@@ -4,17 +4,17 @@ using System.Text;
 
 namespace Larkdown.Parser.Ast
 {
-    public class ParagraphNode : Node
+    public class TextNode : BlockNode
     {
 		public string Text { get; private set; }
 
-		public ParagraphNode()
-			: base(nameof(ParagraphNode))
+		public TextNode(int indent)
+			: base(nameof(TextNode), indent)
 		{
 			Text = String.Empty;
 		}
 
-		public ParagraphNode AddText(string text)
+		public TextNode AddText(string text)
 		{
 			Text += text;
 			return this;

+ 1 - 4
Parser/Lexer/LineStartState.cs

@@ -10,10 +10,7 @@ namespace Larkdown.Parser.Lexer
 		public override State Next(Lexer lexer)
 		{
 			var indent = lexer.Read(new Regex("^[\t ]*"));
-			if (indent.Length > 0)
-			{
-				lexer.Emit(new Token(TokenType.Indentation, indent));
-			}
+			lexer.Emit(new Token(TokenType.Indentation, indent));
 
 			if (lexer.IsEof())
 			{

+ 5 - 0
Parser/Lexer/TextState.cs

@@ -12,6 +12,11 @@ namespace Larkdown.Parser.Lexer
 			var text = lexer.Read(new Regex("\\G.*"));
 			lexer.Emit(new Token(TokenType.Text, text));
 
+			if (lexer.Peek() == '\n')
+			{
+				lexer.Read();
+			}
+
 			return new LineStartState();
 		}
 	}

+ 28 - 4
Parser/Parser.cs

@@ -16,17 +16,41 @@ namespace Larkdown.Parser
 		{
 			var ast = new Ast.Ast();
 			var lexer = new Lexer.Lexer(source);
-			var node = new ParagraphNode();
+			var node = new TextNode(0);
 
 			foreach (var token in lexer.Tokens())
 			{
-				if (token.Type == TokenType.Text)
+				switch (token.Type)
 				{
-					node.AddText(token.Content);
+					case TokenType.Empty:
+						if (!node.IsEmpty())
+						{
+							ast.Add(node);
+						}
+						node = new TextNode(0);
+						break;
+
+					case TokenType.Indentation:
+						if (token.Content.Length != node.Indent)
+						{
+							if (!node.IsEmpty())
+							{
+								ast.Add(node);
+							}
+							node = new TextNode(token.Content.Length);
+						}
+						break;
+
+					case TokenType.Text:
+						node.AddText(token.Content);
+						break;
 				}
 			}
 
-			ast.Add(node);
+			if (!node.IsEmpty())
+			{
+				ast.Add(node);
+			}
 
 			return ast;
 		}