SupaLidlGame/Debug/Transpiler/Tokenizer.cs

183 lines
4.9 KiB
C#

using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace SupaLidlGame.Debug.Transpiler;
public sealed class Tokenizer
{
public readonly char DECIMAL_POINT = '.';
public readonly char DECIMAL_SUBSEPARATOR = ',';
public readonly char NODE_PATH_PREFIX = '$';
private readonly HashSet<char> WHITESPACE = new HashSet<char>
{
' ',
'\n'
};
private readonly HashSet<char> OPERATOR = new HashSet<char>
{
'+',
'-',
'*',
'/',
'.',
',',
'=',
'!',
};
private readonly HashSet<char> GROUPING = new HashSet<char>
{
'(',
')',
};
private readonly HashSet<char> STRING_DELIM = new HashSet<char>
{
'"',
'\'',
};
private readonly Regex REGEX_NUMBER = new Regex("[.0-9]");
private readonly Regex REGEX_IDENTIFIER_START = new Regex("[_a-zA-Z]");
private readonly Regex REGEX_IDENTIFIER = new Regex("[_a-zA-Z0-9]");
private Regex NON_NODE_PATH = new("[^a-zA-Z0-9_\\-\\/\\.\\:]");
private static string ScanString(CharIterator iterator, char delim = '"')
{
string ret = "";
while (iterator.GetNext() != '\0')
{
char c = iterator.MoveNext();
if (c == delim)
{
return ret;
}
else if (c == '\\')
{
char escape = iterator.MoveNext();
switch (escape)
{
case 'n':
ret += '\n';
break;
case 't':
ret += '\t';
break;
case '\0':
throw new InterpreterException("Unexpected EOL, " +
"expected proper string termination",
iterator.Line, iterator.Column);
default:
ret += escape;
break;
}
}
else
{
ret += c;
}
}
throw new InterpreterException($"Unexpected EOL, expected: {delim}",
iterator.Line, iterator.Column);
}
private string ScanNodePath(CharIterator iterator)
{
string ret = "";
bool isAtStart = true;
while (iterator.GetNext() != '\0')
{
char c = iterator.MoveNext();
if (isAtStart && STRING_DELIM.Contains(c))
{
isAtStart = false;
return ScanString(iterator, c);
}
else if (NON_NODE_PATH.IsMatch(c.ToString()))
{
iterator.MoveBack();
return ret;
}
isAtStart = false;
ret += c;
}
return ret;
}
private string ScanRegex(CharIterator iterator, Regex regex)
{
string ret = "";
while (iterator.GetNext() != '\0')
{
char c = iterator.MoveNext();
if (!regex.IsMatch(c.ToString()))
{
iterator.MoveBack();
return ret;
}
ret += c;
}
return ret;
}
public IEnumerable<Token> Lex(CharIterator iterator)
{
//Token curToken = new Token(TokenType.Any, );
while (iterator.GetNext() != default)
{
char c = iterator.MoveNext();
int line = iterator.Line;
int col = iterator.Column;
if (GROUPING.Contains(c))
{
yield return new Token(TokenType.Grouping,
c.ToString(), line, col);
}
else if (OPERATOR.Contains(c))
{
yield return new Token(TokenType.Operator,
c.ToString(), line, col);
}
else if (c == NODE_PATH_PREFIX)
{
yield return new Token(TokenType.NodePath,
ScanNodePath(iterator), line, col);
}
else if (STRING_DELIM.Contains(c))
{
yield return new Token(TokenType.String,
ScanString(iterator, c), line, col);
}
else if (REGEX_IDENTIFIER_START.IsMatch(c.ToString()))
{
yield return new Token(TokenType.Identifier,
c + ScanRegex(iterator, REGEX_IDENTIFIER), line, col);
}
else if (REGEX_NUMBER.IsMatch(c.ToString()))
{
yield return new Token(TokenType.Number,
c + ScanRegex(iterator, REGEX_NUMBER), line, col);
}
else if (WHITESPACE.Contains(c))
{
continue;
}
else
{
throw new InterpreterException($"Unknown symbol {c}",
line, col);
}
}
}
}