Sloth - Lexer.cs - Piero Tofy.it

Lexer.cs

Caricato da: Totem
Scarica il programma completo


	ï»¿using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.IO; namespace Sloth { public class InvalidLexemeException : InterpreterException { public InvalidLexemeException(String message, Int32 lineNumber) : base(message, lineNumber) { } } public enum TokenType { Identifier, Label, Register, LiteralConstant, OpenBracket, ClosedBracket, Sign, Separator, Sharp, NewLine, LiteralString, None } [System.Diagnostics.DebuggerDisplay("{ToString()}")] public struct Token : IEquatable<Token> { private static Token empty = new Token(TokenType.None, String.Empty); public static Token Empty { get { return empty; } } public TokenType Type { get; set; } public String Lexeme { get; set; } public Token(TokenType type, String lexeme) : this() { this.Type = type; this.Lexeme = lexeme; } public override string ToString() { return String.Format("[{0}, {1}]", this.Type.ToString(), this.Lexeme); } public bool Equals(Token other) { return (this.Lexeme == other.Lexeme) && (this.Type == other.Type); } } public class TokenList : List<Token> { } public class TokenizationRuleSet : Dictionary<Regex, TokenType> { } public class SymbolSet : Dictionary<Char, TokenType> { } public class Lexer { protected Char commentChar; private Int32 lineNumber; protected TokenizationRuleSet Rules { get; set; } protected SymbolSet Symbols { get; set; } protected List<Char> IgnoredCharacters { get; set; } public TokenList Output { get; protected set; } public Lexer() { this.Output = new TokenList(); this.IgnoredCharacters = new List<Char>() { ' ', '\t', '\f', '\r', '\v' }; this.commentChar = ';'; this.Symbols = new SymbolSet() { { ',', TokenType.Separator }, { '(', TokenType.OpenBracket }, { ')', TokenType.ClosedBracket }, { '+', TokenType.Sign }, { '-', TokenType.Sign }, { '#', TokenType.Sharp } }; this.Rules = new TokenizationRuleSet() { { new Regex("^[\\w_]+:$", RegexOptions.IgnoreCase), TokenType.Label }, { new Regex("^(A\|D)[0-7]$", RegexOptions.IgnoreCase), TokenType.Register }, { new Regex("^(\\d+\|\\$[0-9A-F]+)(\\.(B\|W\|L))?$", RegexOptions.IgnoreCase), TokenType.LiteralConstant }, { new Regex("^(#)?\\w+(\\.(B\|W\|L))?$", RegexOptions.IgnoreCase), TokenType.Identifier }, { new Regex("^.+$"), TokenType.LiteralString } }; } public void Tokenize(String code) { StringBuilder purgedCodeBuilder = new StringBuilder(); StringBuilder buffer = new StringBuilder(); this.Output.Clear(); lineNumber = 1; // Elimina i commenti Boolean commentOpen = false; foreach (Char c in code) { if (c == commentChar) commentOpen = true; else if (c == '\n') commentOpen = false; if (!commentOpen) purgedCodeBuilder.Append(c); } String purgedCode = purgedCodeBuilder.ToString(); Boolean stringOpen = false; foreach (Char c in purgedCode) { if (c == '\'') { stringOpen = !stringOpen; if (stringOpen == false) { this.Output.Add(new Token(TokenType.LiteralString, buffer.ToString().Trim('\''))); buffer.Clear(); continue; } } if (!stringOpen) // legge token solo quando una c'Ã¨ una stringa aperta { if (c == '\n') { this.AddToken(buffer); lineNumber++; this.Output.Add(new Token(TokenType.NewLine, "\n")); } else if (this.IgnoredCharacters.Contains(c)) this.AddToken(buffer); else if (this.Symbols.ContainsKey(c)) { this.AddToken(buffer); this.Output.Add(new Token(this.Symbols[c], c.ToString())); } else buffer.Append(c); } else buffer.Append(c); } this.AddToken(buffer); buffer = null; } public void TokenizeFromFile(String path) { this.Tokenize(File.ReadAllText(path)); } private Token ScanLexeme(String buffer) { if (String.IsNullOrEmpty(buffer)) return Token.Empty; foreach (Regex expression in this.Rules.Keys) if (expression.IsMatch(buffer)) return new Token(this.Rules[expression], buffer); throw new InvalidLexemeException(String.Format("Il lessema '{0}' non Ã¨ valido.", buffer), lineNumber); } private void AddToken(StringBuilder buffer) { String str = buffer.ToString(); Token token = this.ScanLexeme(str); if (!token.Equals(Token.Empty)) this.Output.Add(token); buffer.Clear(); } } }