Procházet zdrojové kódy

Started refactoring to lexer state machine

lord-executor před 8 roky
rodič
revize
7e1c9f0355

+ 1 - 1
Parser/Ast/ParagraphNode.cs

@@ -16,7 +16,7 @@ namespace Larkdown.Parser.Ast
 
 		public ParagraphNode AddText(string text)
 		{
-			Text += " " + text;
+			Text += text;
 			return this;
 		}
 

+ 68 - 0
Parser/Lexer/Lexer.cs

@@ -0,0 +1,68 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Larkdown.Parser.Lexer
+{
+    public class Lexer
+    {
+		private readonly string _source;
+		private readonly Queue<Token> _buffer;
+		private int _position = 0;
+
+		public Lexer(string source)
+		{
+			_source = Regex.Replace(source, @"\r\n|\n\r|\n|\r", "\n");
+			_buffer = new Queue<Token>();
+		}
+
+		public IEnumerable<Token> Tokens()
+		{
+			State state = new LineStartState();
+
+			while (state != null)
+			{
+				state = state.Next(this);
+
+				while (_buffer.Count > 0)
+				{
+					yield return _buffer.Dequeue();
+				}
+			}
+
+			yield break;
+		}
+
+		public Lexer Emit(Token token)
+		{
+			_buffer.Enqueue(token);
+			return this;
+		}
+
+		public char Read()
+		{
+			return _source[_position++];
+		}
+
+		public string Read(Regex exp)
+		{
+			var match = exp.Match(_source, _position);
+			_position += match.Length;
+
+			return match.Success ? match.Captures[0].Value : String.Empty;
+		}
+
+		public char Peek()
+		{
+			return _source[_position];
+		}
+
+		public bool IsEof()
+		{
+			return _position == _source.Length;
+		}
+	}
+}

+ 36 - 0
Parser/Lexer/LineStartState.cs

@@ -0,0 +1,36 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Larkdown.Parser.Lexer
+{
+	class LineStartState : State
+	{
+		public override State Next(Lexer lexer)
+		{
+			var indent = lexer.Read(new Regex("^[\t ]*"));
+			if (indent.Length > 0)
+			{
+				lexer.Emit(new Token(TokenType.Indentation, indent));
+			}
+
+			if (lexer.IsEof())
+			{
+				lexer.Emit(new Token(TokenType.Eof));
+				return null;
+			}
+
+			switch (lexer.Peek())
+			{
+				case '\n':
+					lexer.Emit(new Token(TokenType.Empty));
+					lexer.Read();
+					return this;
+
+				default:
+					return new TextState();
+			}
+		}
+	}
+}

+ 11 - 0
Parser/Lexer/State.cs

@@ -0,0 +1,11 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Larkdown.Parser.Lexer
+{
+    abstract class State
+    {
+		public abstract State Next(Lexer lexer);
+    }
+}

+ 18 - 0
Parser/Lexer/TextState.cs

@@ -0,0 +1,18 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Larkdown.Parser.Lexer
+{
+	class TextState : State
+	{
+		public override State Next(Lexer lexer)
+		{
+			var text = lexer.Read(new Regex("\\G.*"));
+			lexer.Emit(new Token(TokenType.Text, text));
+
+			return new LineStartState();
+		}
+	}
+}

+ 18 - 0
Parser/Lexer/Token.cs

@@ -0,0 +1,18 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Larkdown.Parser.Lexer
+{
+    public class Token
+    {
+		public TokenType Type { get; }
+		public string Content { get; }
+
+		public Token(TokenType type, string content = null)
+		{
+			Type = type;
+			Content = content;
+		}
+    }
+}

+ 14 - 0
Parser/Lexer/TokenType.cs

@@ -0,0 +1,14 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Larkdown.Parser.Lexer
+{
+    public enum TokenType
+    {
+		Eof = 0,
+		Empty,
+		Indentation,
+		Text,
+    }
+}

+ 4 - 13
Parser/Parser.cs

@@ -15,27 +15,18 @@ namespace Larkdown.Parser
 		public Ast.Ast Parse(string source)
 		{
 			var ast = new Ast.Ast();
-			var lexer = new BlockLexer(source);
+			var lexer = new Lexer.Lexer(source);
 			var node = new ParagraphNode();
-			BlockToken token;
 
-			while ((token = lexer.NextToken()) != null)
+			foreach (var token in lexer.Tokens())
 			{
-				if (String.IsNullOrEmpty(token.Content))
-				{
-					ast.Add(node);
-					node = new ParagraphNode();
-				}
-				else
+				if (token.Type == TokenType.Text)
 				{
 					node.AddText(token.Content);
 				}
 			}
 
-			if (!node.IsEmpty())
-			{
-				ast.Add(node);
-			}
+			ast.Add(node);
 
 			return ast;
 		}