qamp/Qrakhen.Qamp.Core/Tokenization/Reader.cs

373 lines
12 KiB
C#

using Qrakhen.Qamp.Core.Collections.Abstractions;
using Qrakhen.Qamp.Core.Logging;
using System.Text;
using System.Text.RegularExpressions;
using static Qrakhen.Qamp.Core.Tokenization.TokenType;
namespace Qrakhen.Qamp.Core.Tokenization;
internal static partial class ReaderPatterns
{
[GeneratedRegex(@"((?:\d+)?\.(?:\d+))")]
public static partial Regex IsDecimal();
[GeneratedRegex(@"(\d+)")]
public static partial Regex IsInteger();
[GeneratedRegex(@"0x([a-fA-F0-9]+)")]
public static partial Regex IsHexadecimal();
public static readonly Dictionary<string, TokenType> Keywords = new();
public static readonly Dictionary<string, string> Aliases = new();
private static void Define(string key, TokenType type)
{
// not ideal, but momentarily better than setting up complex dialects
if (Keywords.ContainsValue(type))
Aliases[key] = Keywords.First(v => v.Value == type).Key;
Keywords[key] = type;
}
static ReaderPatterns()
{
Define("false", False);
Define("true", True);
Define("null", Null);
Define("void", Null);
Define("and", And);
Define("else", Else);
Define("for", For);
Define("if", If);
Define("or", Or);
Define("this", This);
Define(".~", This);
Define("var", Var);
Define("*~", Var);
Define("while", While);
Define("do", Do);
Define("ref", Ref);
Define("function", Function);
Define("funqtion", Function);
Define("fq", Function);
Define("funq", Function);
Define("return", Return);
Define("<:", Return);
Define("class", Class);
Define("base", Base);
Define("^~", Base);
Define("typeof", TypeOf);
Define("?:", TypeOf);
Define("print", Print);
Define("::", Print);
Define("globals", PrintGlobals);
Define("stack", PrintStack);
Define("expr", PrintExpr);
Define("import", Import);
Define("export", Export);
}
}
public class Reader : IReader<Token>, IDisposable
{
private const char EofByte = (char)0xFF;
private const int ChunkSize = 0x20;
private readonly Stream _stream;
private readonly Tokens _tokens;
private readonly List<char> _buffer;
private readonly ILogger _logger = LoggerService.Get<Reader>();
private int _line = 0;
private int _column = 0;
private int _current = 0;
private long _startIndex = 0;
private TokenPosition _startPosition = default;
public bool Done => _stream.Position >= _stream.Length &&
_current >= _buffer.Count;
public TokenPosition CurrentPosition => new TokenPosition(_line, _column);
public Reader(Stream stream)
{
_stream = stream;
_stream.Position = 0;
_tokens = new Tokens();
_buffer = [];
}
public void Dispose()
{
_stream.Dispose();
}
public Token NextToken(bool includeCompilationIrrelevant = false)
{
if (Done)
return Token.Eof();
Token token = ReadToken();
while (!includeCompilationIrrelevant && token.IsCompilationIrrelevant && !Done)
token = ReadToken();
if (!includeCompilationIrrelevant && token.IsCompilationIrrelevant && Done)
return Token.Eof();
return token;
}
private char Next()
{
if (!Done && _current >= _buffer.Count)
Continue();
if (Done)
return EofByte;
_column++;
return _buffer[_current++];
}
private char Peek(int delta = 0)
{
if (_current + delta >= _buffer.Count)
Continue();
if (_current + delta >= _buffer.Count)
return EofByte;
return _buffer[_current + delta];
}
private Token ReadToken()
{
_startIndex = _current;
_startPosition = CurrentPosition;
string buffer = string.Empty;
char c = Next();
buffer += c;
if (c == '\n' || c == '\r') {
var position = CurrentPosition;
if (c == '\r')
buffer += Next();
_line++;
_column = 0;
return MakeToken(NewLine, buffer);
}
if (c == '#') {
var position = CurrentPosition;
do {
c = Next();
buffer += c;
} while (c != '\n' && c != EofByte);
return MakeToken(Comment, buffer);
}
if (c == ' ') {
var position = CurrentPosition;
while (Match(' '))
buffer += ' ';
return MakeToken(Whitespace, buffer);
}
if (c is '\'' or '"') {
string str = ReadUntil(buffer, c);
if (str[^1] != c || str.Length < 2)
return MakeToken(Error, str);
return MakeToken(TokenType.String, str.Substring(1, str.Length - 2));
}
if (IsDigit(c) || (c == '.' && IsDigit(Peek()))) {
do {
c = Peek(0);
if (IsDigit(c) || IsHex(c) || c == 'x' || c == '.')
buffer += Next();
else
break;
} while (c != EofByte);
if (ReaderPatterns.IsDecimal().TryMatch(buffer, out Match match))
return MakeToken(TokenType.Decimal, match.Groups[1].Value);
if (ReaderPatterns.IsHexadecimal().TryMatch(buffer, out match))
return MakeToken(Hexadecimal, match.Groups[1].Value);
if (ReaderPatterns.IsInteger().TryMatch(buffer, out match))
return MakeToken(Integer, match.Groups[1].Value);
throw new ReaderException($"Unrecognizable number detected <{buffer}>", this);
}
if (IsLetter(c)) {
do {
c = Peek();
if (IsLetter(c))
buffer += Next();
else
break;
} while (c != EofByte);
return MakeIdentifier(buffer);
}
return MakeOperator(buffer);
}
private string ReadUntil(string buffer, char until)
{
char c;
do {
c = Next();
if (c == EofByte)
return buffer;
/*throw new ReaderException(
$"Unexpected end of feed while trying to read string <{buffer}>",
this);*/
buffer += c;
} while (c != until || Peek(-2) == '\\');
return buffer;
}
private Token MakeIdentifier(string buffer)
{
if (ReaderPatterns.Keywords.TryGetValue(buffer, out TokenType type))
return MakeToken(type, buffer);
return MakeToken(Identifier, buffer);
}
private bool Check(char c) => Peek(0) == c;
private bool Match(char c)
{
if (!Check(c))
return false;
Next();
return true;
}
private bool MatchSequence(char[] sequence, string buffer, TokenType type, out Token token)
{
token = Token.Void;
string _buffer = buffer;
for (int i = 0; i < sequence.Length; i++) {
char c = buffer.Length > i ? buffer[i] : Peek(i - buffer.Length);
if (sequence[i] != c)
return false;
_buffer += c;
}
token = MakeToken(type, _buffer);
return true;
}
private bool IsLetter(char c) => c is >= 'a' and <= 'z' or >= 'A' and <= 'Z' or '_';
private bool IsDigit(char c) => c is >= '0' and <= '9';
private bool IsHex(char c) => c is >= 'a' and <= 'f' or >= 'A' and <= 'F' || IsDigit(c);
private Token MakeOperator(string buffer)
{
return buffer[0] switch {
'[' => MakeToken(ArrayOpen, buffer),
']' => MakeToken(ArrayClose, buffer),
'{' => MakeToken(ContextOpen, buffer),
'}' => MakeToken(ContextClose, buffer),
'(' => MakeToken(GroupOpen, buffer),
')' => MakeToken(GroupClose, buffer),
'.' => Check('~') ?
MakeToken(This, buffer + Next()) :
MakeToken(Dot, buffer),
',' => MakeToken(Comma, buffer),
';' => MakeToken(Semicolon, buffer),
':' => Check(':') ?
MakeToken(Print, buffer + Next()) :
MakeToken(Colon, buffer),
'&' => Check('&') ?
MakeToken(And, buffer + Next()) :
MakeToken(BitwiseAnd, buffer),
'^' => Check('~') ?
MakeToken(Base, buffer + Next()) :
MakeToken(BitwiseXor, buffer),
'%' => Check('=') ?
MakeToken(ModuloEqual, buffer + Next()) :
MakeToken(Modulo, buffer),
'|' => Check('|') ?
MakeToken(Or, buffer + Next()) :
MakeToken(BitwiseOr, buffer),
'!' => Check('=') ?
MakeToken(BangEqual, buffer + Next()) :
MakeToken(Bang, buffer),
'+' => Check('+') ?
MakeToken(Increment, buffer + Next()) :
Check('=') ?
MakeToken(PlusEqual, buffer + Next()) :
MakeToken(Plus, buffer),
'-' => Check('-') ?
MakeToken(Decrement, buffer + Next()) :
Check('=') ?
MakeToken(MinusEqual, buffer + Next()) :
MakeToken(Minus, buffer),
'/' => Check('=') ?
MakeToken(SlashEqual, buffer + Next()) :
MakeToken(Slash, buffer),
'*' => Check('=') ?
MakeToken(StarEqual, buffer + Next()) :
Check('~') ?
MakeToken(Var, buffer + Next()) :
MakeToken(Star, buffer),
'=' => Check('=') ?
MakeToken(EqualEqual, buffer + Next()) :
MakeToken(Equal, buffer),
'<' => Check('~') ?
MakeToken(Equal, buffer + Next()) :
Check(':') ?
MakeToken(Return, buffer + Next()) :
Check('+') ?
MakeToken(ArrayAdd, buffer + Next()) :
Check('<') ?
MakeToken(BitwiseLeft, buffer + Next()) :
Check('=') ?
MakeToken(LessEqual, buffer + Next()) :
MakeToken(Less, buffer),
'>' => Check('>') ?
MakeToken(BitwiseRight, buffer + Next()) :
Check('=') ?
MakeToken(GreaterEqual, buffer + Next()) :
MakeToken(Greater, buffer),
'~' => MakeToken(BitwiseNot, buffer),
'?' => Check('?') ?
MakeToken(DoubleQuestion, buffer + Next()) :
Check(':') ?
MakeToken(TypeOf, buffer + Next()) :
MakeToken(Question, buffer),
_ => MakeToken(Error, buffer) //throw new ReaderException($"Could not identify operator <{buffer}>", this)
};
}
private Token MakeToken(TokenType type, string buffer)
{
return new Token(
type,
buffer,
_startPosition,
new StreamSpan(_startIndex, _current - _startIndex));
}
private void Continue()
{
if (Done)
return;
long size = _stream.Length - _stream.Position;
if (size > ChunkSize)
size = ChunkSize;
byte[] buffer = new byte[size];
int read = _stream.Read(buffer, 0, (int)size);
if (read < 0)
return;
_buffer.AddRange(Encoding.UTF8.GetChars(buffer));
}
private readonly struct CaptureHandle(long start)
{
public readonly long Start = start;
}
}