371 lines
12 KiB
C#
371 lines
12 KiB
C#
using Qrakhen.Qamp.Core.Collections.Abstractions;
|
|
using Qrakhen.Qamp.Core.Logging;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using static Qrakhen.Qamp.Core.Tokenization.TokenType;
|
|
|
|
namespace Qrakhen.Qamp.Core.Tokenization;
|
|
|
|
internal static partial class ReaderPatterns
|
|
{
|
|
[GeneratedRegex(@"((?:\d+)?\.(?:\d+))")]
|
|
public static partial Regex IsDecimal();
|
|
|
|
[GeneratedRegex(@"(\d+)")]
|
|
public static partial Regex IsInteger();
|
|
|
|
[GeneratedRegex(@"0x([a-fA-F0-9]+)")]
|
|
public static partial Regex IsHexadecimal();
|
|
|
|
public static readonly Dictionary<string, TokenType> Keywords = new();
|
|
public static readonly Dictionary<string, string> Aliases = new();
|
|
|
|
private static void Define(string key, TokenType type)
|
|
{
|
|
// not ideal, but momentarily better than setting up complex dialects
|
|
if (Keywords.ContainsValue(type))
|
|
Aliases[key] = Keywords.First(v => v.Value == type).Key;
|
|
Keywords[key] = type;
|
|
}
|
|
|
|
static ReaderPatterns()
|
|
{
|
|
Define("false", False);
|
|
Define("true", True);
|
|
Define("null", Null);
|
|
Define("void", Null);
|
|
Define("and", And);
|
|
Define("else", Else);
|
|
Define("for", For);
|
|
Define("if", If);
|
|
Define("or", Or);
|
|
Define("this", This);
|
|
Define(".~", This);
|
|
Define("var", Var);
|
|
Define("*~", Var);
|
|
Define("while", While);
|
|
Define("do", Do);
|
|
Define("ref", Ref);
|
|
Define("function", Function);
|
|
Define("funqtion", Function);
|
|
Define("fq", Function);
|
|
Define("funq", Function);
|
|
Define("return", Return);
|
|
Define("<:", Return);
|
|
Define("class", Class);
|
|
Define("base", Base);
|
|
Define("^~", Base);
|
|
Define("typeof", TypeOf);
|
|
Define("?:", TypeOf);
|
|
Define("print", Print);
|
|
Define("::", Print);
|
|
Define("globals", PrintGlobals);
|
|
Define("stack", PrintStack);
|
|
Define("expr", PrintExpr);
|
|
Define("import", Import);
|
|
Define("export", Export);
|
|
}
|
|
}
|
|
|
|
public class Reader : IReader<Token>, IDisposable
|
|
{
|
|
private const char EofByte = (char)0xFF;
|
|
private const int ChunkSize = 0x20;
|
|
|
|
private readonly Stream _stream;
|
|
private readonly Tokens _tokens;
|
|
|
|
private readonly List<char> _buffer;
|
|
|
|
private readonly ILogger _logger = LoggerService.Get<Reader>();
|
|
|
|
private int _line = 0;
|
|
private int _column = 0;
|
|
private int _current = 0;
|
|
private long _startIndex = 0;
|
|
private TokenPosition _startPosition = default;
|
|
|
|
public bool Done => _stream.Position >= _stream.Length &&
|
|
_current >= _buffer.Count;
|
|
|
|
public TokenPosition CurrentPosition => new TokenPosition(_line, _column);
|
|
|
|
public Reader(Stream stream)
|
|
{
|
|
_stream = stream;
|
|
_stream.Position = 0;
|
|
_tokens = new Tokens();
|
|
_buffer = [];
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
_stream.Dispose();
|
|
}
|
|
|
|
public Token NextToken(bool includeCompilationIrrelevant = false)
|
|
{
|
|
if (Done)
|
|
return Token.Eof();
|
|
|
|
Token token = ReadToken();
|
|
|
|
while (!includeCompilationIrrelevant && token.IsCompilationIrrelevant && !Done)
|
|
token = ReadToken();
|
|
|
|
if (!includeCompilationIrrelevant && token.IsCompilationIrrelevant && Done)
|
|
return Token.Eof();
|
|
|
|
return token;
|
|
}
|
|
|
|
private char Next()
|
|
{
|
|
if (!Done && _current >= _buffer.Count)
|
|
Continue();
|
|
if (Done)
|
|
return EofByte;
|
|
_column++;
|
|
return _buffer[_current++];
|
|
}
|
|
|
|
private char Peek(int delta = 0)
|
|
{
|
|
if (_current + delta >= _buffer.Count)
|
|
Continue();
|
|
if (_current + delta >= _buffer.Count)
|
|
return EofByte;
|
|
return _buffer[_current + delta];
|
|
}
|
|
|
|
private Token ReadToken()
|
|
{
|
|
_startIndex = _current;
|
|
_startPosition = CurrentPosition;
|
|
string buffer = string.Empty;
|
|
char c = Next();
|
|
buffer += c;
|
|
|
|
if (c == '\n' || c == '\r') {
|
|
var position = CurrentPosition;
|
|
if (c == '\r')
|
|
buffer += Next();
|
|
_line++;
|
|
_column = 0;
|
|
return MakeToken(NewLine, buffer);
|
|
}
|
|
|
|
if (c == '#') {
|
|
var position = CurrentPosition;
|
|
do {
|
|
c = Next();
|
|
buffer += c;
|
|
} while (c != '\n' && c != EofByte);
|
|
return MakeToken(Comment, buffer);
|
|
}
|
|
|
|
if (c == ' ') {
|
|
var position = CurrentPosition;
|
|
while (Match(' '))
|
|
buffer += ' ';
|
|
return MakeToken(Whitespace, buffer);
|
|
}
|
|
|
|
if (c is '\'' or '"') {
|
|
string str = ReadUntil(buffer, c);
|
|
if (str[^1] != c || str.Length < 2)
|
|
return MakeToken(Error, str);
|
|
return MakeToken(TokenType.String, str.Substring(1, str.Length - 2));
|
|
}
|
|
|
|
if (IsDigit(c) || (c == '.' && IsDigit(Peek()))) {
|
|
do {
|
|
c = Peek(0);
|
|
if (IsDigit(c) || IsHex(c) || c == 'x' || c == '.')
|
|
buffer += Next();
|
|
else
|
|
break;
|
|
} while (c != EofByte);
|
|
if (ReaderPatterns.IsDecimal().TryMatch(buffer, out Match match))
|
|
return MakeToken(TokenType.Decimal, match.Groups[1].Value);
|
|
if (ReaderPatterns.IsHexadecimal().TryMatch(buffer, out match))
|
|
return MakeToken(Hexadecimal, match.Groups[1].Value);
|
|
if (ReaderPatterns.IsInteger().TryMatch(buffer, out match))
|
|
return MakeToken(Integer, match.Groups[1].Value);
|
|
throw new ReaderException($"Unrecognizable number detected <{buffer}>", this);
|
|
}
|
|
|
|
if (IsLetter(c)) {
|
|
do {
|
|
c = Peek();
|
|
if (IsLetter(c))
|
|
buffer += Next();
|
|
else
|
|
break;
|
|
} while (c != EofByte);
|
|
return MakeIdentifier(buffer);
|
|
}
|
|
|
|
return MakeOperator(buffer);
|
|
}
|
|
|
|
private string ReadUntil(string buffer, char until)
|
|
{
|
|
char c;
|
|
do {
|
|
c = Next();
|
|
if (c == EofByte)
|
|
return buffer;
|
|
/*throw new ReaderException(
|
|
$"Unexpected end of feed while trying to read string <{buffer}>",
|
|
this);*/
|
|
buffer += c;
|
|
} while (c != until || Peek(-2) == '\\');
|
|
return buffer;
|
|
}
|
|
|
|
private Token MakeIdentifier(string buffer)
|
|
{
|
|
if (ReaderPatterns.Keywords.TryGetValue(buffer, out TokenType type))
|
|
return MakeToken(type, buffer);
|
|
return MakeToken(Identifier, buffer);
|
|
}
|
|
|
|
private bool Check(char c) => Peek(0) == c;
|
|
|
|
private bool Match(char c)
|
|
{
|
|
if (!Check(c))
|
|
return false;
|
|
|
|
Next();
|
|
return true;
|
|
}
|
|
|
|
private bool MatchSequence(char[] sequence, string buffer, TokenType type, out Token token)
|
|
{
|
|
token = Token.Void;
|
|
string _buffer = buffer;
|
|
for (int i = 0; i < sequence.Length; i++) {
|
|
char c = buffer.Length > i ? buffer[i] : Peek(i - buffer.Length);
|
|
if (sequence[i] != c)
|
|
return false;
|
|
_buffer += c;
|
|
}
|
|
|
|
token = MakeToken(type, _buffer);
|
|
return true;
|
|
}
|
|
|
|
private bool IsLetter(char c) => c is >= 'a' and <= 'z' or >= 'A' and <= 'Z' or '_';
|
|
|
|
private bool IsDigit(char c) => c is >= '0' and <= '9';
|
|
|
|
private bool IsHex(char c) => c is >= 'a' and <= 'f' or >= 'A' and <= 'F' || IsDigit(c);
|
|
|
|
private Token MakeOperator(string buffer)
|
|
{
|
|
return buffer[0] switch {
|
|
'[' => MakeToken(ArrayOpen, buffer),
|
|
']' => MakeToken(ArrayClose, buffer),
|
|
'{' => MakeToken(ContextOpen, buffer),
|
|
'}' => MakeToken(ContextClose, buffer),
|
|
'(' => MakeToken(GroupOpen, buffer),
|
|
')' => MakeToken(GroupClose, buffer),
|
|
'.' => Check('~') ?
|
|
MakeToken(This, buffer + Next()) :
|
|
MakeToken(Dot, buffer),
|
|
',' => MakeToken(Comma, buffer),
|
|
';' => MakeToken(Semicolon, buffer),
|
|
':' => Check(':') ?
|
|
MakeToken(Print, buffer + Next()) :
|
|
MakeToken(Colon, buffer),
|
|
'&' => Check('&') ?
|
|
MakeToken(And, buffer + Next()) :
|
|
MakeToken(BitwiseAnd, buffer),
|
|
'^' => Check('~') ?
|
|
MakeToken(Base, buffer + Next()) :
|
|
MakeToken(BitwiseXor, buffer),
|
|
'%' => Check('=') ?
|
|
MakeToken(ModuloEqual, buffer + Next()) :
|
|
MakeToken(Modulo, buffer),
|
|
'|' => Check('|') ?
|
|
MakeToken(Or, buffer + Next()) :
|
|
MakeToken(BitwiseOr, buffer),
|
|
'!' => Check('=') ?
|
|
MakeToken(BangEqual, buffer + Next()) :
|
|
MakeToken(Bang, buffer),
|
|
'+' => Check('+') ?
|
|
MakeToken(Increment, buffer + Next()) :
|
|
Check('=') ?
|
|
MakeToken(PlusEqual, buffer + Next()) :
|
|
MakeToken(Plus, buffer),
|
|
'-' => Check('-') ?
|
|
MakeToken(Decrement, buffer + Next()) :
|
|
Check('=') ?
|
|
MakeToken(MinusEqual, buffer + Next()) :
|
|
MakeToken(Minus, buffer),
|
|
'/' => Check('=') ?
|
|
MakeToken(SlashEqual, buffer + Next()) :
|
|
MakeToken(Slash, buffer),
|
|
'*' => Check('=') ?
|
|
MakeToken(StarEqual, buffer + Next()) :
|
|
Check('~') ?
|
|
MakeToken(Var, buffer + Next()) :
|
|
MakeToken(Star, buffer),
|
|
'=' => Check('=') ?
|
|
MakeToken(EqualEqual, buffer + Next()) :
|
|
MakeToken(Equal, buffer),
|
|
'<' => Check('~') ?
|
|
MakeToken(Equal, buffer + Next()) :
|
|
Check(':') ?
|
|
MakeToken(Return, buffer + Next()) :
|
|
Check('<') ?
|
|
MakeToken(BitwiseLeft, buffer + Next()) :
|
|
Check('=') ?
|
|
MakeToken(LessEqual, buffer + Next()) :
|
|
MakeToken(Less, buffer),
|
|
'>' => Check('>') ?
|
|
MakeToken(BitwiseRight, buffer + Next()) :
|
|
Check('=') ?
|
|
MakeToken(GreaterEqual, buffer + Next()) :
|
|
MakeToken(Greater, buffer),
|
|
'~' => MakeToken(BitwiseNot, buffer),
|
|
'?' => Check('?') ?
|
|
MakeToken(DoubleQuestion, buffer + Next()) :
|
|
Check(':') ?
|
|
MakeToken(TypeOf, buffer + Next()) :
|
|
MakeToken(Question, buffer),
|
|
_ => MakeToken(Error, buffer) //throw new ReaderException($"Could not identify operator <{buffer}>", this)
|
|
};
|
|
}
|
|
|
|
private Token MakeToken(TokenType type, string buffer)
|
|
{
|
|
return new Token(
|
|
type,
|
|
buffer,
|
|
_startPosition,
|
|
new StreamSpan(_startIndex, _current - _startIndex));
|
|
}
|
|
|
|
private void Continue()
|
|
{
|
|
if (Done)
|
|
return;
|
|
long size = _stream.Length - _stream.Position;
|
|
if (size > ChunkSize)
|
|
size = ChunkSize;
|
|
byte[] buffer = new byte[size];
|
|
int read = _stream.Read(buffer, 0, (int)size);
|
|
if (read < 0)
|
|
return;
|
|
_buffer.AddRange(Encoding.UTF8.GetChars(buffer));
|
|
}
|
|
|
|
private readonly struct CaptureHandle(long start)
|
|
{
|
|
public readonly long Start = start;
|
|
}
|
|
}
|