334 lines
10 KiB
C#
334 lines
10 KiB
C#
using Qrakhen.Qamp.Core.Logging;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using static Qrakhen.Qamp.Core.Tokenization.TokenType;
|
|
|
|
namespace Qrakhen.Qamp.Core.Tokenization;
|
|
|
|
internal static partial class ReaderPatterns
|
|
{
|
|
[GeneratedRegex(@"((?:\d+)?\.(?:\d+))")]
|
|
public static partial Regex IsDecimal();
|
|
|
|
[GeneratedRegex(@"(\d+)")]
|
|
public static partial Regex IsInteger();
|
|
|
|
[GeneratedRegex(@"0x([a-fA-F0-9]+)")]
|
|
public static partial Regex IsHexadecimal();
|
|
|
|
public static readonly Dictionary<string, TokenType> Keywords = new();
|
|
|
|
static ReaderPatterns()
|
|
{
|
|
Keywords["false"] = False;
|
|
Keywords["true"] = True;
|
|
Keywords["null"] = Null;
|
|
Keywords["and"] = And;
|
|
Keywords["else"] = Else;
|
|
Keywords["for"] = For;
|
|
Keywords["if"] = If;
|
|
Keywords["or"] = Or;
|
|
Keywords["this"] = This;
|
|
Keywords["var"] = Var;
|
|
Keywords["while"] = While;
|
|
Keywords["do"] = Do;
|
|
Keywords["typeof"] = TypeOf;
|
|
Keywords["ref"] = Ref;
|
|
Keywords["function"] = Function;
|
|
Keywords["return"] = Return;
|
|
Keywords["class"] = Class;
|
|
Keywords["super"] = Super;
|
|
Keywords["print"] = Print;
|
|
Keywords["import"] = Import;
|
|
Keywords["export"] = Export;
|
|
}
|
|
}
|
|
|
|
public class Reader : IReader<Token>, IDisposable
|
|
{
|
|
private const char EofByte = (char)0xFF;
|
|
private const int ChunkSize = 0x20;
|
|
|
|
private readonly Stream _stream;
|
|
private readonly Tokens _tokens;
|
|
|
|
private readonly List<char> _buffer;
|
|
|
|
private readonly ILogger _logger = LoggerService.Get<Reader>();
|
|
|
|
private int _line = 0;
|
|
private int _column = 0;
|
|
private int _current = 0;
|
|
private long _startIndex = 0;
|
|
private TokenPosition _startPosition = default;
|
|
|
|
public bool Done => _stream.Position >= _stream.Length &&
|
|
_current >= _buffer.Count;
|
|
|
|
public TokenPosition CurrentPosition => new TokenPosition(_line, _column);
|
|
|
|
public Reader(Stream stream)
|
|
{
|
|
_stream = stream;
|
|
_stream.Position = 0;
|
|
_tokens = new Tokens();
|
|
_buffer = [];
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
_stream.Dispose();
|
|
}
|
|
|
|
public Token NextToken(bool includeCompilationIrrelevant = false)
|
|
{
|
|
if (Done)
|
|
return Token.Eof();
|
|
|
|
Token token = ReadToken();
|
|
|
|
while (!includeCompilationIrrelevant && token.IsCompilationIrrelevant && !Done)
|
|
token = ReadToken();
|
|
|
|
if (!includeCompilationIrrelevant && token.IsCompilationIrrelevant && Done)
|
|
return Token.Eof();
|
|
|
|
return token;
|
|
}
|
|
|
|
private char Next()
|
|
{
|
|
if (!Done && _current >= _buffer.Count)
|
|
Continue();
|
|
if (Done)
|
|
return EofByte;
|
|
_column++;
|
|
return _buffer[_current++];
|
|
}
|
|
|
|
private char Peek(int delta = 0)
|
|
{
|
|
if (_current + delta >= _buffer.Count)
|
|
Continue();
|
|
if (_current + delta >= _buffer.Count)
|
|
return EofByte;
|
|
return _buffer[_current + delta];
|
|
}
|
|
|
|
private Token ReadToken()
|
|
{
|
|
_startIndex = _current;
|
|
_startPosition = CurrentPosition;
|
|
string buffer = string.Empty;
|
|
char c = Next();
|
|
buffer += c;
|
|
|
|
if (c == '\n' || c == '\r') {
|
|
var position = CurrentPosition;
|
|
if (c == '\r')
|
|
buffer += Next();
|
|
_line++;
|
|
_column = 0;
|
|
return MakeToken(NewLine, buffer);
|
|
}
|
|
|
|
if (c == '#') {
|
|
var position = CurrentPosition;
|
|
do {
|
|
c = Next();
|
|
buffer += c;
|
|
} while (c != '\n' && c != EofByte);
|
|
return MakeToken(Comment, buffer);
|
|
}
|
|
|
|
if (c == ' ') {
|
|
var position = CurrentPosition;
|
|
while (Match(' '))
|
|
buffer += ' ';
|
|
return MakeToken(Whitespace, buffer);
|
|
}
|
|
|
|
if (c is '\'' or '"') {
|
|
string str = ReadUntil(buffer, c);
|
|
if (str[^1] != c || str.Length < 2)
|
|
return MakeToken(Error, str);
|
|
return MakeToken(TokenType.String, str.Substring(1, str.Length - 2));
|
|
}
|
|
|
|
if (IsDigit(c) || (c == '.' && IsDigit(Peek()))) {
|
|
do {
|
|
c = Peek(0);
|
|
if (IsDigit(c) || IsHex(c) || c == 'x' || c == '.')
|
|
buffer += Next();
|
|
else
|
|
break;
|
|
} while (c != EofByte);
|
|
if (ReaderPatterns.IsDecimal().TryMatch(buffer, out Match match))
|
|
return MakeToken(TokenType.Decimal, match.Groups[1].Value);
|
|
if (ReaderPatterns.IsHexadecimal().TryMatch(buffer, out match))
|
|
return MakeToken(Hexadecimal, match.Groups[1].Value);
|
|
if (ReaderPatterns.IsInteger().TryMatch(buffer, out match))
|
|
return MakeToken(Integer, match.Groups[1].Value);
|
|
throw new ReaderException($"Unrecognizable number detected <{buffer}>", this);
|
|
}
|
|
|
|
if (IsLetter(c)) {
|
|
do {
|
|
c = Peek();
|
|
if (IsLetter(c))
|
|
buffer += Next();
|
|
else
|
|
break;
|
|
} while (c != EofByte);
|
|
return MakeIdentifier(buffer);
|
|
}
|
|
|
|
return MakeOperator(buffer);
|
|
}
|
|
|
|
private string ReadUntil(string buffer, char until)
|
|
{
|
|
char c;
|
|
do {
|
|
c = Next();
|
|
if (c == EofByte)
|
|
return buffer;
|
|
/*throw new ReaderException(
|
|
$"Unexpected end of feed while trying to read string <{buffer}>",
|
|
this);*/
|
|
buffer += c;
|
|
} while (c != until || Peek(-2) == '\\');
|
|
return buffer;
|
|
}
|
|
|
|
private Token MakeIdentifier(string buffer)
|
|
{
|
|
if (ReaderPatterns.Keywords.TryGetValue(buffer, out TokenType type))
|
|
return MakeToken(type, buffer);
|
|
return MakeToken(Identifier, buffer);
|
|
}
|
|
|
|
private bool Check(char c) => Peek(0) == c;
|
|
|
|
private bool Match(char c)
|
|
{
|
|
if (!Check(c))
|
|
return false;
|
|
|
|
Next();
|
|
return true;
|
|
}
|
|
|
|
private bool MatchSequence(char[] sequence, string buffer, TokenType type, out Token token)
|
|
{
|
|
token = Token.Void;
|
|
string _buffer = buffer;
|
|
for (int i = 0; i < sequence.Length; i++) {
|
|
char c = buffer.Length > i ? buffer[i] : Peek(i - buffer.Length);
|
|
if (sequence[i] != c)
|
|
return false;
|
|
_buffer += c;
|
|
}
|
|
|
|
token = MakeToken(type, _buffer);
|
|
return true;
|
|
}
|
|
|
|
private bool IsLetter(char c) => c is >= 'a' and <= 'z' or >= 'A' and <= 'Z' or '_';
|
|
|
|
private bool IsDigit(char c) => c is >= '0' and <= '9';
|
|
|
|
private bool IsHex(char c) => c is >= 'a' and <= 'f' or >= 'A' and <= 'F' || IsDigit(c);
|
|
|
|
private Token MakeOperator(string buffer)
|
|
{
|
|
return buffer[0] switch {
|
|
'[' => MakeToken(ArrayOpen, buffer),
|
|
']' => MakeToken(ArrayClose, buffer),
|
|
'{' => MakeToken(ContextOpen, buffer),
|
|
'}' => MakeToken(ContextClose, buffer),
|
|
'(' => MakeToken(GroupOpen, buffer),
|
|
')' => MakeToken(GroupClose, buffer),
|
|
'.' => MakeToken(Dot, buffer),
|
|
',' => MakeToken(Comma, buffer),
|
|
';' => MakeToken(Semicolon, buffer),
|
|
':' => MakeToken(Colon, buffer),
|
|
'&' => Check('&') ?
|
|
MakeToken(And, buffer + Next()) :
|
|
MakeToken(BitwiseAnd, buffer),
|
|
'^' => MakeToken(BitwiseXor, buffer),
|
|
'%' => Check('=') ?
|
|
MakeToken(ModuloEqual, buffer + Next()) :
|
|
MakeToken(Modulo, buffer),
|
|
'|' => Check('|') ?
|
|
MakeToken(Or, buffer + Next()) :
|
|
MakeToken(BitwiseOr, buffer),
|
|
'!' => Check('=') ?
|
|
MakeToken(BangEqual, buffer + Next()) :
|
|
MakeToken(Bang, buffer),
|
|
'+' => Check('+') ?
|
|
MakeToken(Increment, buffer + Next()) :
|
|
Check('=') ?
|
|
MakeToken(PlusEqual, buffer + Next()) :
|
|
MakeToken(Plus, buffer),
|
|
'-' => Check('-') ?
|
|
MakeToken(Decrement, buffer + Next()) :
|
|
Check('=') ?
|
|
MakeToken(MinusEqual, buffer + Next()) :
|
|
MakeToken(Minus, buffer),
|
|
'/' => Check('=') ?
|
|
MakeToken(SlashEqual, buffer + Next()) :
|
|
MakeToken(Slash, buffer),
|
|
'*' => Check('=') ?
|
|
MakeToken(StarEqual, buffer + Next()) :
|
|
MakeToken(Star, buffer),
|
|
'=' => Check('=') ?
|
|
MakeToken(EqualEqual, buffer + Next()) :
|
|
MakeToken(Equal, buffer),
|
|
'<' => Check('<') ?
|
|
MakeToken(BitwiseLeft, buffer + Next()) :
|
|
Check('=') ?
|
|
MakeToken(LessEqual, buffer + Next()) :
|
|
MakeToken(Less, buffer),
|
|
'>' => Check('>') ?
|
|
MakeToken(BitwiseRight, buffer + Next()) :
|
|
Check('=') ?
|
|
MakeToken(GreaterEqual, buffer + Next()) :
|
|
MakeToken(Greater, buffer),
|
|
'~' => MakeToken(BitwiseNot, buffer),
|
|
'?' => Check('?') ?
|
|
MakeToken(DoubleQuestion, buffer + Next()) :
|
|
MakeToken(Question, buffer),
|
|
_ => MakeToken(Error, buffer) //throw new ReaderException($"Could not identify operator <{buffer}>", this)
|
|
};
|
|
}
|
|
|
|
private Token MakeToken(TokenType type, string buffer)
|
|
{
|
|
return new Token(
|
|
type,
|
|
buffer,
|
|
_startPosition,
|
|
new StreamSpan(_startIndex, _current - _startIndex));
|
|
}
|
|
|
|
private void Continue()
|
|
{
|
|
if (Done)
|
|
return;
|
|
long size = _stream.Length - _stream.Position;
|
|
if (size > ChunkSize)
|
|
size = ChunkSize;
|
|
byte[] buffer = new byte[size];
|
|
int read = _stream.Read(buffer, 0, (int)size);
|
|
if (read < 0)
|
|
return;
|
|
_buffer.AddRange(Encoding.UTF8.GetChars(buffer));
|
|
}
|
|
|
|
private readonly struct CaptureHandle(long start)
|
|
{
|
|
public readonly long Start = start;
|
|
}
|
|
}
|