qamp/Qrakhen.Qamp.Core/Compilation/Digester.cs

878 lines
24 KiB
C#

using Qrakhen.Qamp.Core.Collections;
using Qrakhen.Qamp.Core.Execution;
using Qrakhen.Qamp.Core.Values;
using T = Qrakhen.Qamp.Core.Tokenization.TokenType;
using Op = Qrakhen.Qamp.Core.Execution.OpCode;
using String = Qrakhen.Qamp.Core.Values.Objects.String;
using Qrakhen.Qamp.Core.Compilation.Builders;
using Qrakhen.Qamp.Core.Values.Objects;
using Qrakhen.Qamp.Core.Tokenization;
using Qrakhen.Qamp.Core.Logging;
using Qrakhen.Qamp.Core.Collections.Abstractions;
namespace Qrakhen.Qamp.Core.Compilation;
public record struct Local(string Name, int Depth = -1, bool IsCaptured = false);
public record struct Outer(long Index, bool IsLocal);
public enum FunctionType { Function, Constructor, Method, Code }
internal class CompilerState
{
public readonly IReader<Token> Reader;
public Token Current;
public Token Previous;
public TokenPosition ErrorPosition = TokenPosition.None;
public TokenPosition CurrentPosition => Reader.CurrentPosition;
public bool HadError => !ErrorPosition.IsNone;
public bool Done => Reader.Done;
public bool ThrowOnError { get; set; } = true;
public Builder Builder;
public FunctionBuilder Function => Builder.Function;
public long CurrentInstruction => Function.Segment.Instructions.Count;
}
public class DigesterProvider
{
public object ExpressionDigester; // etc.
}
public abstract class xDigester
{
}
public class Digester : ISteppable<Token>
{
private readonly CompilerState _compiler;
public event Action<Token, string>? Error;
private readonly ILogger _logger = LoggerService.Get<Digester>();
private readonly IReader<Token> _reader;
internal FunctionBuilder Function => Builder.Function;
internal Token Current;
internal Token Previous;
public Builder Builder { get; private set; }
public ClassBuilder? ClassBuilder { get; private set; }
public bool ThrowOnError { get; set; } = true;
public bool HadError => !ErrorPosition.IsNone;
public bool Done => _reader.Done;
public long CurrentInstruction => Function.Segment.Instructions.Count;
public TokenPosition CurrentPosition => _reader.CurrentPosition;
public TokenPosition ErrorPosition { get; private set; } = TokenPosition.None;
public Digester(IReader<Token> reader)
{
Builder = CreateBuilder(FunctionType.Code);
_reader = reader;
}
public Function Digest()
{
#if LOG
_logger.Method();
#endif
TokenPosition previousPosition = _reader.CurrentPosition;
Next();
while (!Done && !Match(T.Eof)) {
Token token = Current;
#if LOG
_logger.Verbose($"Digesting {token}");
#endif
if (previousPosition == _reader.CurrentPosition) {
ErrorAtCurrent("Loop detected / could not digest further");
break;
}
Process();
}
SegmentBuilder _debug = Builder.Function.Segment;
Function function = FinishBuilder();
#if LOG
_logger.Debug($"Digest done, instruction set:");
#endif
#if LOG
_logger.Verbose(_debug.Debug());
#endif
return function;
}
internal Builder CreateBuilder(FunctionType type)
{
Builder builder = new(type, Builder);
if (type != FunctionType.Code)
builder.Function.Name = Previous.Value!;
builder.Locals.Push(new Local(type == FunctionType.Function ? "" : "this", 0));
Builder = builder;
return Builder;
}
internal Function FinishBuilder()
{
EmitReturn();
Function function = Builder.Function.Build();
Builder = Builder.Outer;
return function;
}
internal void Process()
{
#if LOG
_logger.Method();
#endif
if (Match(T.Class)) DeclareClass();
else if (Match(T.Function)) DeclareFunction();
else if (Match(T.Var)) DeclareVariable();
else Statement();
}
internal void Statement()
{
#if LOG
_logger.Method();
#endif
if (Match(T.If)) If();
else if (Match(T.Else)) Else();
else if (Match(T.While)) While();
else if (Match(T.Do)) Do();
else if (Match(T.For)) For();
else if (Match(T.Return)) Return();
else if (Match(T.TypeOf)) TypeOf();
else if (Match(T.Print)) Print();
else if (Match(T.PrintStack)) PrintStack();
else if (Match(T.PrintGlobals)) PrintGlobals();
else if (Match(T.PrintExpr)) PrintExpr();
else if (Match(T.Export)) Export();
else if (Match(T.Import)) Import();
else if (Match(T.ContextOpen)) Context();
else Expression();
}
internal void Expression()
{
#if LOG
_logger.Method();
#endif
ParseExpression();
if (Builder.Type == FunctionType.Code && Check(T.Eof))
Emit(Op.Print); // if we got presented with only an expression on top-level code and no ; at the end,
// we assume the user wants to see the value.
else {
Consume(T.Semicolon, "Expected ';' after statement");
Emit(Op.Pop); // in an expression statement, we definitely do not want to remain the value on stack,
// as it would fuck over the entire stack for the rest of the execution.
// not pushing it, e.g. with a flag to the Expression() call, would be a good todo idea.
//
// a tad bit older me: it's a terrible idea as return a = b; wouldn't work anymore
}
}
internal void ParseExpression() => WeightedDigest(Weight.Assign);
internal void Context()
{
#if LOG
_logger.Method();
#endif
BeginScope();
Block();
EndScope();
}
internal void Block()
{
#if LOG
_logger.Method();
#endif
while (!Check(T.ContextClose) && !Check(T.Eof))
Process();
Consume(T.ContextClose, "Expected '}' after context");
}
internal void WeightedDigest(Weight precedence)
{
#if LOG
_logger.Method();
#endif
Next();
Rule rule = ExpressionParser.Get(Previous.Type);
if (rule.Prefix == null) {
ErrorAtCurrent("Expected expression");
return;
}
bool canAssign = precedence <= Weight.Assign;
rule.Prefix(this, canAssign);
while (precedence <= ExpressionParser.Get(Current.Type).Weight) {
Next();
ExpressionParser.Get(Previous.Type).Infix?.Invoke(this, canAssign);
}
if (canAssign && Match(T.Equal))
ErrorAtCurrent("Invalid assignment target.");
}
internal void DeclareClass()
{
#if LOG
_logger.Method();
#endif
Consume(T.Identifier, "Expected a class name.");
// we need both a global name constant and a local declaration for this class
string name = Previous.Value!;
long identifier = IdentifierConstant(name);
DeclareLocal(name);
EmitDynamic(Op.Class, identifier);
DefineVariable(identifier);
ClassBuilder = new ClassBuilder {
Outer = ClassBuilder,
Name = name,
IsDerived = false
};
if (Match(T.Colon))
{
Consume(T.Identifier, "Expected base class name.", false);
ExpressionParser.Variable(this, false);
if (name == Previous.Value!)
ErrorAtPrevious($"Class {name} can not inherit from itself!");
// Add base reference by injecting a synthetic base variable into a virtual scope
BeginScope();
AddLocal("base");
DefineVariable(0);
ExpressionParser.Variable(this, name, false);
ClassBuilder.IsDerived = true;
}
ExpressionParser.Variable(this, name, false);
Consume(T.ContextOpen, "Expected '{' for class body declaration.", false);
while (!Check(T.ContextClose) && !Check(T.Eof))
DeclareMember();
Consume(T.ContextClose, "Expected '}' to end class body declaration.", false);
Emit(Op.Pop);
if (ClassBuilder.IsDerived)
EndScope();
ClassBuilder = ClassBuilder.Outer;
}
internal void DeclareFunction()
{
#if LOG
_logger.Method();
#endif
long global = ParseVariable();
InitializeLocal();
CreateFunction(FunctionType.Function);
DefineVariable(global);
}
internal void DeclareMember()
{
// todo: only methods allowed atm, add fields and shit too
#if LOG
_logger.Method();
#endif
Consume(T.Identifier, "Expected method name.", false);
long identifier = IdentifierConstant(Previous.Value!);
FunctionType type = FunctionType.Method;
if (Previous.Value == ClassBuilder?.Name)
type = FunctionType.Constructor;
CreateFunction(type);
EmitDynamic(Op.Method, identifier);
}
internal void DeclareVariable()
{
#if LOG
_logger.Method();
#endif
long global = ParseVariable();
if (Match(T.Equal))
ParseExpression();
else
Emit(Op.Null);
Consume(T.Semicolon, "missing ; after variable declaration");
DefineVariable(global);
}
internal void Export()
{
#if LOG
_logger.Method();
#endif
throw new NotImplementedException($"Import is not yet implemented. Sorry, it's difficult.");
}
internal void Import()
{
#if LOG
_logger.Method();
#endif
throw new NotImplementedException($"Import is not yet implemented. Sorry, it's difficult.");
}
internal void If()
{
#if LOG
_logger.Method();
#endif
Consume(T.GroupOpen, "Expected '(' after if");
ParseExpression();
Consume(T.GroupClose, "Expected ')' after condition");
long then = EmitJump(Op.JumpIfFalse);
Emit(Op.Pop);
Statement();
long @else = EmitJump(Op.Jump);
PatchJump(then);
Emit(Op.Pop);
if (Match(T.Else))
Statement();
PatchJump(@else);
}
internal void Else()
{
#if LOG
_logger.Method();
#endif
ErrorAtPrevious("This 'else' is very alone :( Have you meant to place it after an 'if'?");
}
internal void While()
{
#if LOG
_logger.Method();
#endif
long start = Function.CurrentInstruction;
Consume(T.GroupOpen, "Expected while condition to be placed inside parentheses. Missing '('.", false);
ParseExpression();
Consume(T.GroupClose, "Expected while condition to be placed inside parentheses. Missing ')'.", false);
long exit = EmitJump(Op.JumpIfFalse);
Emit(Op.Pop);
Statement();
EmitLoop(start);
PatchJump(exit);
Emit(Op.Pop);
}
internal void Do()
{
#if LOG
_logger.Method();
#endif
// todo: just do a while loop here with synthetic counter variables checked at end
throw new NotImplementedException();
}
internal void For()
{
#if LOG
_logger.Method();
#endif
BeginScope();
Consume(T.GroupOpen, "Expected '(' after for statement");
if (Match(T.Semicolon)) {
// nothing
} else if (Match(T.Var)) {
DeclareVariable();
} else {
Expression();
}
long start = Function.CurrentInstruction;
long exit = -1;
if (!Match(T.Semicolon)) {
ParseExpression();
Consume(T.Semicolon, "Expected ';' after condition");
exit = EmitJump(Op.JumpIfFalse);
Emit(Op.Pop);
}
if (!Match(T.GroupClose)) {
long body = EmitJump(Op.Jump);
long increment = Function.Segment.Instructions.Count;
ParseExpression();
Emit(Op.Pop);
Consume(T.GroupClose, "Expected ')' after for clause");
EmitLoop(start);
start = increment;
PatchJump(body);
}
Statement();
EmitLoop(start);
if (exit > -1) {
PatchJump(exit);
Emit(Op.Pop);
}
EndScope();
}
internal void Return()
{
#if LOG
_logger.Method();
#endif
if (Builder.Type == FunctionType.Code) {
ErrorAtPrevious($"Interesting approach, but that won't work.");
return;
}
if (Match(T.Semicolon)) {
EmitReturn();
} else {
if (Builder.Type == FunctionType.Constructor) {
ErrorAtPrevious($"We're not returning from a constructor where I'm from.");
return;
}
ParseExpression();
Consume(T.Semicolon, $"Expected ';' after return.");
Emit(Op.Return);
}
}
internal void Print()
{
#if LOG
_logger.Method();
#endif
ParseExpression();
Consume(T.Semicolon, "Expected ';' after print call.");
Emit(Op.Print);
}
internal void PrintGlobals()
{
#if LOG
_logger.Method();
#endif
Consume(T.Semicolon, "Expected ';' after print call.");
Emit(Op.PrintGlobals);
}
internal void PrintStack()
{
#if LOG
_logger.Method();
#endif
Consume(T.Semicolon, "Expected ';' after print call.");
Emit(Op.PrintStack);
}
internal void PrintExpr()
{
#if LOG
_logger.Method();
#endif
Consume(T.Semicolon, "Expected ';' after print call.");
Emit(Op.PrintExpr);
}
internal void TypeOf()
{
#if LOG
_logger.Method();
#endif
ParseExpression();
Emit(Op.Typeof);
}
internal void BeginScope()
{
#if LOG
_logger.Method();
#endif
Builder.ScopeDepth++;
}
internal void EndScope()
{
#if LOG
_logger.Method();
#endif
Builder.ScopeDepth--;
while (Builder.Locals.Count > 0 &&
Builder.Locals.Peek().Depth > Builder.ScopeDepth) {
if (Builder.Locals.Peek().IsCaptured) {
Emit(Op.CloseOuter);
} else {
Emit(Op.Pop);
}
Builder.Locals.Pop();
}
}
internal void CreateFunction(FunctionType type)
{
#if LOG
_logger.Method();
#endif
Builder next = CreateBuilder(type);
BeginScope();
Consume(T.GroupOpen, "Expected '(' to start argument list.");
if (!Check(T.GroupClose)) {
do {
Builder.Function.ArgumentCount++;
if (Builder.Function.ArgumentCount > 0xFF)
ErrorAtCurrent($"In the name of the lord, how many arguments do you need?");
DefineVariable(ParseVariable());
} while (Match(T.Comma));
}
Consume(T.GroupClose, "Expected ')' after argument list.");
Consume(T.ContextOpen, "Expected function body");
Block();
Function function = FinishBuilder();
Emit(Op.Context);
EmitDynamic(MakeConstant(Obj.Create(function)));
for (int i = 0; i < function.OuterCount; i++) {
Outer outer = next.Outers.Get(i);
Emit(outer.IsLocal ? 1 : 0);
EmitDynamic(outer.Index);
}
}
internal long ResolveLocal(Builder builder, string name)
{
for (int i = 0; i < builder.Locals.Count; i++) {
if (builder.Locals[i].Name == name)
return i;
}
return -1;
}
internal void AddLocal(string? name)
{
#if LOG
_logger.Method(name);
#endif
Builder.Locals.Push(new Local(ThrowIfEmpty(name), -1));
}
internal void DeclareLocal(string? name)
{
if (Builder.ScopeDepth == 0)
return;
#if LOG
_logger.Method(name);
#endif
for (int i = 0; i < Builder.Locals.Count; i++) {
Local local = Builder.Locals[i];
// skip if the most recent local isn't even in our scope
if (local.Depth != -1 && local.Depth < Builder.ScopeDepth)
break;
if (ThrowIfEmpty(name).Equals(local.Name))
ErrorAtCurrent($"A variable by the name {name} already exists in the current scope.");
}
AddLocal(name);
}
internal long AddOuter(Builder builder, long index, bool isLocal)
{
for (int i = 0; i < builder.Function.OuterCount; i++) {
Outer outer = builder.Outers[i];
if (outer.Index == index && outer.IsLocal == isLocal)
return i;
}
builder.Outers.Push(new Outer(index, isLocal));
return builder.Outers.Count - 1;
}
internal long ResolveOuter(Builder builder, string name)
{
if (builder.Outer == null)
return -1;
long local = ResolveLocal(builder.Outer, name);
if (local > -1) {
Local element = builder.Outer.Locals.Get(local);
element.IsCaptured = true;
builder.Outer.Locals.Set(local, element);
return AddOuter(builder, local, true);
}
long outer = ResolveOuter(builder.Outer, name);
if (outer > -1)
return AddOuter(builder, outer, false);
return -1;
}
internal long IdentifierConstant(string? name)
{
#if LOG
_logger.Method(name);
#endif
return MakeConstant(String.Make(name ??
throw new TokenException("Empty string value for identifier detected", _reader, Current)));
}
internal long MakeConstant(Value value)
{
#if LOG
_logger.Method(value.ToString());
#endif
long constant = Function.Segment.Constants.Add(value);
#if LOG
_logger.Verbose($"Registered constant {value} at index {constant}");
#endif
return constant;
}
internal long ParseVariable()
{
#if LOG
_logger.Method();
#endif
string? name = Consume(T.Identifier, "Missing identifier for variable").Value;
if (Builder.ScopeDepth == 0)
return IdentifierConstant(name);
DeclareLocal(name);
return 0;
}
internal void InitializeLocal()
{
if (Builder.ScopeDepth == 0)
return;
#if LOG
_logger.Method();
#endif
var local = Builder.Locals.Peek();
local.Depth = Builder.ScopeDepth;
Builder.Locals.Set(Builder.Locals.Position - 1, local);
}
internal void DefineVariable(long index)
{
#if LOG
_logger.Method(index.ToString());
#endif
if (Builder.ScopeDepth > 0) {
InitializeLocal();
} else {
Emit(Op.DefineGlobal);
EmitDynamic(index);
}
}
internal byte Arguments()
{
byte count = 0;
if (!Check(T.GroupClose)) {
do {
ParseExpression();
if (count++ >= 0xFF)
ErrorAtCurrent("How many arguments do you need?");
} while (Match(T.Comma));
}
Consume(T.GroupClose, "Expected ')' after argument list");
return count;
}
/// <summary>
/// Primary Emit.
/// </summary>
internal void Emit(params byte[] data)
{
#if LOG
if (data.Length == 1)
_logger.Verbose($"Emitting {new Instruction(data[0])}");
else if (data.Length > 1)
_logger.Verbose($"Emitting {string.Join(' ', data)}");
#endif
foreach (var value in data)
Function.Segment.Instructions.Add(value);
}
/// <summary>
/// Sets instruction bytes at <paramref name="position"/>
/// </summary>
internal void Patch(byte[] bytes, long position)
{
for (int i = 0; i < bytes.Length; i++)
{
Function.Segment.Instructions[position + i] = bytes[i];
}
}
internal void EmitConstant(Value constant)
{
Emit(Op.Constant);
EmitDynamic(MakeConstant(constant));
}
internal long EmitJump(Instruction instruction)
{
Emit(instruction);
Emit(-1L);
return CurrentInstruction - sizeof(long);
}
internal void PatchJump(long offset)
{
long target = CurrentInstruction - offset - sizeof(long);
Patch(target.GetBytes(), offset);
}
internal void EmitLoop(long start)
{
Emit(Op.Loop);
long offset = Function.Segment.Instructions.Count - start + sizeof(long);
Emit(offset);
}
/// <summary>
/// Emits a dynamic range of bytes into the instruction set.
/// </summary>
internal void EmitDynamic(byte[] data)
{
if (data.Length > 1)
Emit((byte)(data.Length | 0x80)); // 0x80 flag for length marker, anything else is direct value
Emit(data);
}
/// <summary>
/// Emits the given <paramref name="instruction"/>, and then adds <paramref name="data"/> using <see cref="EmitDynamic(byte[])"/>.
/// </summary>
internal void EmitDynamic(Instruction instruction, long data)
{
Emit(instruction);
EmitDynamic(data);
}
internal void EmitDynamic(long value)
=> EmitDynamic(value.GetDynamicBytes());
internal void Emit(Instruction instruction)
{
Emit(instruction.Code);
}
internal void Emit(Instruction instruction, byte[] data)
{
Emit(instruction);
Emit(data);
}
internal void Emit(IEnumerable<Instruction> instructions)
{
foreach (var i in instructions)
Emit(i);
}
internal void EmitReturn()
{
if (Builder.Type == FunctionType.Constructor) {
Emit(Op.GetLocal, 0);
} else {
Emit(Op.Null);
}
Emit(Op.Return);
}
internal void Emit(params Instruction[] instructions)
=> Emit(instructions.AsEnumerable());
internal void Emit(short value)
=> Emit(BitConverter.GetBytes(value));
internal void Emit(int value)
=> Emit(BitConverter.GetBytes(value));
internal void Emit(long value)
=> Emit(BitConverter.GetBytes(value));
internal void Emit(ulong value)
=> Emit(BitConverter.GetBytes(value));
internal void Emit(double value)
=> Emit(BitConverter.GetBytes(value));
public Token Next()
{
Previous = Current;
return Current = _reader.NextToken();
}
internal Token Consume(T expected, string errorMessage, bool acceptEof = true)
{
if ((acceptEof && Current.Type == T.Eof) || Current.Type == expected) {
Next();
return Previous;
}
ErrorAtCurrent(errorMessage);
return Token.Error(errorMessage);
}
internal bool Match(T type)
{
if (!Check(type))
return false;
Next();
return true;
}
internal bool Check(T type)
=> Current.Type == type;
internal void ErrorAt(Token token, string errorMessage)
=> ReportError(token, errorMessage);
internal void ErrorAtCurrent(string errorMessage)
=> ReportError(Current, errorMessage);
internal void ErrorAtPrevious(string errorMessage)
=> ReportError(Previous, errorMessage);
private void ReportError(Token token, string errorMessage)
{
#if LOG
_logger.Method();
#endif
ErrorPosition = token.Position;
#if LOG
_logger.Error($"At token index {token.Position}: {errorMessage}");
#endif
if (ThrowOnError)
throw new TokenException($"At token index {token.Position}: {errorMessage}", _reader, token);
Error?.Invoke(token, errorMessage);
}
private string ThrowIfEmpty(string? value, string? message = $"Unexpected empty string detected")
{
if (string.IsNullOrEmpty(value))
throw new TokenException("Empty string value for identifier detected", _reader, Current);
return value;
}
}