feat: unify AST structures, introduce MIR and update codegen

This commit is contained in:
2026-04-21 22:08:15 +02:00
parent 22023a8734
commit 3e0b5c5b00
10 changed files with 686 additions and 307 deletions
+58 -23
View File
@@ -1,24 +1,58 @@
use crate::frontend::sema::Ty;
use crate::frontend::token::Span;
use std::fmt::Debug;
#[derive(Debug, PartialEq, Eq)]
pub struct Module {
pub decls: Vec<Decl>,
pub trait Phase: Debug + PartialEq + Eq {
type ReturnType: Debug + PartialEq + Eq;
type ParamType: Debug + PartialEq + Eq;
type ExprType: Debug + PartialEq + Eq;
}
#[derive(Debug, PartialEq, Eq)]
pub struct Decl {
pub kind: DeclKind,
pub struct Untyped;
impl Phase for Untyped {
type ReturnType = Option<Type>;
type ParamType = FunctionParam;
type ExprType = ();
}
#[derive(Debug, PartialEq, Eq)]
pub struct Typed;
impl Phase for Typed {
type ReturnType = Ty;
type ParamType = (String, Ty);
type ExprType = Ty;
}
pub type TypedModule = Module<Typed>;
pub type TypedDecl = Decl<Typed>;
pub type TypedDeclKind = DeclKind<Typed>;
pub type TypedStmt = Stmt<Typed>;
pub type TypedStmtKind = StmtKind<Typed>;
pub type TypedExpr = Expr<Typed>;
pub type TypedExprKind = ExprKind<Typed>;
#[derive(Debug, PartialEq, Eq)]
pub struct Module<P: Phase = Untyped> {
pub decls: Vec<Decl<P>>,
}
#[derive(Debug, PartialEq, Eq)]
pub struct Decl<P: Phase = Untyped> {
pub kind: DeclKind<P>,
pub span: Span,
}
#[derive(Debug, PartialEq, Eq)]
pub enum DeclKind {
pub enum DeclKind<P: Phase = Untyped> {
Function {
name: String,
name_span: Span,
params: Vec<FunctionParam>,
return_type: Option<Type>,
body: Stmt,
params: Vec<P::ParamType>,
return_type: P::ReturnType,
body: Stmt<P>,
},
}
@@ -49,34 +83,35 @@ pub enum TypeKind {
}
#[derive(Debug, PartialEq, Eq)]
pub struct Stmt {
pub kind: StmtKind,
pub struct Stmt<P: Phase = Untyped> {
pub kind: StmtKind<P>,
pub span: Span,
}
#[derive(Debug, PartialEq, Eq)]
pub enum StmtKind {
pub enum StmtKind<P: Phase = Untyped> {
Compound {
inner: Vec<Stmt>,
inner: Vec<Stmt<P>>,
},
If {
condition: Expr,
then: Box<Stmt>,
elze: Option<Box<Stmt>>,
condition: Expr<P>,
then: Box<Stmt<P>>,
elze: Option<Box<Stmt<P>>>,
},
Return {
value: Option<Expr>,
value: Option<Expr<P>>,
},
}
#[derive(Debug, PartialEq, Eq)]
pub struct Expr {
pub kind: ExprKind,
pub struct Expr<P: Phase = Untyped> {
pub kind: ExprKind<P>,
pub ty: P::ExprType,
pub span: Span,
}
#[derive(Debug, PartialEq, Eq)]
pub enum ExprKind {
pub enum ExprKind<P: Phase = Untyped> {
Identifier {
name: String,
},
@@ -88,12 +123,12 @@ pub enum ExprKind {
},
Unary {
op: UnaryOp,
expr: Box<Expr>,
expr: Box<Expr<P>>,
},
Binary {
op: BinaryOp,
lhs: Box<Expr>,
rhs: Box<Expr>,
lhs: Box<Expr<P>>,
rhs: Box<Expr<P>>,
},
}
-1
View File
@@ -3,4 +3,3 @@ pub mod lexer;
pub mod parser;
pub mod sema;
pub mod token;
pub mod typed_ast;
+29
View File
@@ -431,6 +431,7 @@ impl<'src> Parser<'src> {
lhs: Box::new(lhs),
rhs: Box::new(rhs),
},
ty: (),
span,
};
}
@@ -451,6 +452,7 @@ impl<'src> Parser<'src> {
kind: ExprKind::Identifier {
name: token.text.to_string(),
},
ty: (),
span: token.span,
})
}
@@ -472,6 +474,7 @@ impl<'src> Parser<'src> {
Ok(Expr {
kind: ExprKind::Integer { value },
ty: (),
span: token.span,
})
}
@@ -483,6 +486,7 @@ impl<'src> Parser<'src> {
kind: ExprKind::Boolean {
value: token.text == "true",
},
ty: (),
span: token.span,
})
}
@@ -494,6 +498,7 @@ impl<'src> Parser<'src> {
Ok(Expr {
kind: expr.kind,
ty: (),
span: lparen.span.join(rparen.span),
})
}
@@ -503,6 +508,7 @@ impl<'src> Parser<'src> {
let rhs = self.parse_expr_bp(r_bp)?;
Ok(Expr {
ty: (),
span: op_token.span.join(rhs.span),
kind: ExprKind::Unary {
op,
@@ -598,6 +604,7 @@ mod test {
parse("0xBEEF;", Parser::parse_expr),
Success(Expr {
kind: ExprKind::Integer { value: 0xBEEF },
ty: (),
span: Span::new(0, 6)
})
);
@@ -606,6 +613,7 @@ mod test {
parse("0o777;", Parser::parse_expr),
Success(Expr {
kind: ExprKind::Integer { value: 0o777 },
ty: (),
span: Span::new(0, 5)
})
);
@@ -614,6 +622,7 @@ mod test {
parse("0b1001;", Parser::parse_expr),
Success(Expr {
kind: ExprKind::Integer { value: 0b1001 },
ty: (),
span: Span::new(0, 6)
})
);
@@ -622,6 +631,7 @@ mod test {
parse("1337;", Parser::parse_expr),
Success(Expr {
kind: ExprKind::Integer { value: 1337 },
ty: (),
span: Span::new(0, 4)
})
);
@@ -633,6 +643,7 @@ mod test {
parse("true;", Parser::parse_expr),
Success(Expr {
kind: ExprKind::Boolean { value: true },
ty: (),
span: Span::new(0, 4)
})
);
@@ -641,6 +652,7 @@ mod test {
parse("false;", Parser::parse_expr),
Success(Expr {
kind: ExprKind::Boolean { value: false },
ty: (),
span: Span::new(0, 5)
})
);
@@ -655,9 +667,11 @@ mod test {
op: UnaryOp::Neg,
expr: Box::new(Expr {
kind: ExprKind::Integer { value: 5 },
ty: (),
span: Span::new(1, 2)
})
},
ty: (),
span: Span::new(0, 2)
})
);
@@ -672,6 +686,7 @@ mod test {
op: BinaryOp::Add,
lhs: Box::new(Expr {
kind: ExprKind::Integer { value: 12 },
ty: (),
span: Span::new(0, 2)
}),
rhs: Box::new(Expr {
@@ -679,16 +694,20 @@ mod test {
op: BinaryOp::Mul,
lhs: Box::new(Expr {
kind: ExprKind::Integer { value: 3 },
ty: (),
span: Span::new(5, 6)
}),
rhs: Box::new(Expr {
kind: ExprKind::Integer { value: 6 },
ty: (),
span: Span::new(9, 10)
})
},
ty: (),
span: Span::new(5, 10)
})
},
ty: (),
span: Span::new(0, 10)
})
);
@@ -710,6 +729,7 @@ mod test {
kind: StmtKind::Return {
value: Some(Expr {
kind: ExprKind::Integer { value: 0 },
ty: (),
span: Span::new(7, 8)
})
},
@@ -726,6 +746,7 @@ mod test {
kind: StmtKind::If {
condition: Expr {
kind: ExprKind::Boolean { value: true },
ty: (),
span: Span::new(3, 7)
},
then: Box::new(Stmt {
@@ -818,15 +839,18 @@ mod test {
kind: ExprKind::Identifier {
name: "a".to_string()
},
ty: (),
span: Span::new(39, 40)
}),
rhs: Box::new(Expr {
kind: ExprKind::Identifier {
name: "b".to_string()
},
ty: (),
span: Span::new(43, 44)
})
},
ty: (),
span: Span::new(39, 44)
})
},
@@ -852,13 +876,16 @@ mod test {
kind: ExprKind::Identifier {
name: "a".to_string()
},
ty: (),
span: Span::new(0, 1)
}),
rhs: Box::new(Expr {
kind: ExprKind::Integer { value: 5 },
ty: (),
span: Span::new(5, 6)
})
},
ty: (),
span: Span::new(0, 6)
})
);
@@ -873,9 +900,11 @@ mod test {
op: UnaryOp::Not,
expr: Box::new(Expr {
kind: ExprKind::Boolean { value: true },
ty: (),
span: Span::new(1, 5)
})
},
ty: (),
span: Span::new(0, 5)
})
);
+58 -88
View File
@@ -2,7 +2,6 @@ use std::collections::HashMap;
use crate::frontend::ast::*;
use crate::frontend::token::Span;
use crate::frontend::typed_ast::*;
/// A structured error produced during semantic analysis, carrying a human-readable
/// message and the [Span] of the offending AST node for precise diagnostics.
@@ -79,7 +78,6 @@ pub struct Sema {
deferred_unary_neg: Vec<(Span, Ty, Ty, Option<u64>)>,
deferred_binary: Vec<(Span, Ty)>,
deferred_literals: Vec<(Span, Ty)>,
is_reachable: bool,
}
impl Sema {
@@ -93,7 +91,6 @@ impl Sema {
deferred_unary_neg: Vec::new(),
deferred_binary: Vec::new(),
deferred_literals: Vec::new(),
is_reachable: true,
}
}
@@ -245,10 +242,10 @@ impl Sema {
match &decl.kind {
DeclKind::Function {
name,
name_span,
params,
return_type,
body,
..
} => {
let mut typed_params = Vec::new();
@@ -265,24 +262,19 @@ impl Sema {
.map(|t| Ty::from(&t.kind))
.unwrap_or(Ty::Unit);
self.is_reachable = true;
let typed_body = self.analyze_stmt(body, &expected_ret_ty);
if expected_ret_ty != Ty::Unit && self.is_reachable {
self.errors.push(SemanticError::new(
"not all control paths return a value",
decl.span,
));
}
self.leave_scope();
TypedDecl::Function {
name: name.clone(),
params: typed_params,
return_type: expected_ret_ty,
body: typed_body,
TypedDecl {
kind: TypedDeclKind::Function {
name: name.clone(),
name_span: *name_span,
params: typed_params,
return_type: expected_ret_ty,
body: typed_body,
},
span: decl.span,
}
}
}
@@ -294,22 +286,19 @@ impl Sema {
match &stmt.kind {
StmtKind::Compound { inner } => {
let mut typed_inner = Vec::new();
let mut reported_unreachable = false;
self.enter_scope();
for s in inner {
if !self.is_reachable && !reported_unreachable {
self.errors
.push(SemanticError::new("unreachable statement", s.span));
reported_unreachable = true;
}
typed_inner.push(self.analyze_stmt(s, expected_ret_ty));
}
self.leave_scope();
TypedStmt::Compound { inner: typed_inner }
TypedStmt {
kind: TypedStmtKind::Compound { inner: typed_inner },
span: stmt.span,
}
}
StmtKind::If {
condition,
@@ -322,29 +311,16 @@ impl Sema {
self.errors.push(SemanticError::new(err, condition.span));
}
let initial_reachable = self.is_reachable;
self.is_reachable = initial_reachable;
let typed_then = self.analyze_stmt(then, expected_ret_ty);
let reachable_after_then = self.is_reachable;
let typed_elze = elze.as_ref().map(|e| self.analyze_stmt(e, expected_ret_ty));
let typed_elze = elze.as_ref().map(|e| {
self.is_reachable = initial_reachable;
self.analyze_stmt(e, expected_ret_ty)
});
let reachable_after_else = if elze.is_some() {
self.is_reachable
} else {
initial_reachable
};
self.is_reachable = reachable_after_then || reachable_after_else;
TypedStmt::If {
condition: typed_condition,
then: Box::new(typed_then),
elze: typed_elze.map(Box::new),
TypedStmt {
kind: TypedStmtKind::If {
condition: typed_condition,
then: Box::new(typed_then),
elze: typed_elze.map(Box::new),
},
span: stmt.span,
}
}
StmtKind::Return { value } => {
@@ -355,19 +331,21 @@ impl Sema {
self.errors.push(SemanticError::new(err, expr.span));
}
self.is_reachable = false;
TypedStmt::Return {
value: Some(typed_expr),
TypedStmt {
kind: TypedStmtKind::Return {
value: Some(typed_expr),
},
span: stmt.span,
}
} else {
if let Err(err) = self.unify(&Ty::Unit, expected_ret_ty) {
self.errors.push(SemanticError::new(err, stmt.span));
}
self.is_reachable = false;
TypedStmt::Return { value: None }
TypedStmt {
kind: TypedStmtKind::Return { value: None },
span: stmt.span,
}
}
}
}
@@ -392,6 +370,7 @@ impl Sema {
TypedExpr {
kind: TypedExprKind::Identifier { name: name.clone() },
ty,
span: expr.span,
}
}
@@ -402,12 +381,14 @@ impl Sema {
TypedExpr {
kind: TypedExprKind::Integer { value: *value },
ty,
span: expr.span,
}
}
ExprKind::Boolean { value } => TypedExpr {
kind: TypedExprKind::Boolean { value: *value },
ty: Ty::Bool,
span: expr.span,
},
ExprKind::Unary {
@@ -435,6 +416,7 @@ impl Sema {
expr: Box::new(typed_inner),
},
ty: result_ty,
span: expr.span,
}
}
@@ -454,6 +436,7 @@ impl Sema {
expr: Box::new(typed_inner),
},
ty: Ty::Bool,
span: expr.span,
}
}
@@ -490,6 +473,7 @@ impl Sema {
rhs: Box::new(typed_rhs),
},
ty: result_ty,
span: expr.span,
}
}
}
@@ -497,9 +481,11 @@ impl Sema {
/// Recursively applies the final resolved type substitutions to a typed declaration.
fn apply_subst_decl(&self, decl: TypedDecl) -> TypedDecl {
match decl {
TypedDecl::Function {
let span = decl.span;
let kind = match decl.kind {
TypedDeclKind::Function {
name,
name_span,
params,
return_type,
body,
@@ -509,45 +495,52 @@ impl Sema {
.map(|(n, ty)| (n, self.apply_subst(&ty)))
.collect();
TypedDecl::Function {
TypedDeclKind::Function {
name,
name_span,
params,
return_type: self.apply_subst(&return_type),
body: self.apply_subst_stmt(body),
}
}
}
};
TypedDecl { kind, span }
}
/// Recursively applies the final resolved type substitutions to a typed statement.
fn apply_subst_stmt(&self, stmt: TypedStmt) -> TypedStmt {
match stmt {
TypedStmt::Compound { inner } => TypedStmt::Compound {
let span = stmt.span;
let kind = match stmt.kind {
TypedStmtKind::Compound { inner } => TypedStmtKind::Compound {
inner: inner
.into_iter()
.map(|s| self.apply_subst_stmt(s))
.collect(),
},
TypedStmt::If {
TypedStmtKind::If {
condition,
then,
elze,
} => TypedStmt::If {
} => TypedStmtKind::If {
condition: self.apply_subst_expr(condition),
then: Box::new(self.apply_subst_stmt(*then)),
elze: elze.map(|s| Box::new(self.apply_subst_stmt(*s))),
},
TypedStmt::Return { value } => TypedStmt::Return {
TypedStmtKind::Return { value } => TypedStmtKind::Return {
value: value.map(|e| self.apply_subst_expr(e)),
},
}
};
TypedStmt { kind, span }
}
/// Recursively applies the final resolved type substitutions to a typed expression.
fn apply_subst_expr(&self, expr: TypedExpr) -> TypedExpr {
let ty = self.apply_subst(&expr.ty);
let span = expr.span;
let kind = match expr.kind {
TypedExprKind::Identifier { name } => TypedExprKind::Identifier { name },
TypedExprKind::Integer { value } => TypedExprKind::Integer { value },
@@ -565,7 +558,7 @@ impl Sema {
},
};
TypedExpr { kind, ty }
TypedExpr { kind, ty, span }
}
/// Resolves all deferred type constraints accumulated during analysis, such as
@@ -682,9 +675,9 @@ impl Sema {
#[cfg(test)]
mod test {
use crate::frontend::{
ast::TypedModule,
parser::Parser,
sema::{Sema, SemanticError},
typed_ast::TypedModule,
};
fn analyze(source: &str) -> Result<TypedModule, Vec<SemanticError>> {
@@ -804,27 +797,4 @@ mod test {
let src = "fn test() { if 12 {} }";
assert!(analyze(src).is_err());
}
#[test]
fn not_all_paths_return() {
let src = "fn test(a: i32) -> i32 { if a < 5 { return 5; } else { } }";
assert!(analyze(src).is_err());
let src = "fn test() -> i32 { }";
assert!(analyze(src).is_err());
let src = "fn test(a: i32) -> i32 { if a < 5 { return 5; } return 10; }";
assert!(analyze(src).is_ok());
}
#[test]
fn unreachable_code() {
let src = "fn test() -> i32 { return 5; return 10; }";
let errors = analyze(src).unwrap_err();
assert!(
errors
.iter()
.any(|e| e.message.contains("unreachable statement"))
);
}
}
-60
View File
@@ -1,60 +0,0 @@
use crate::frontend::ast::{BinaryOp, UnaryOp};
use crate::frontend::sema::Ty;
#[derive(Debug, PartialEq, Eq)]
pub struct TypedModule {
pub decls: Vec<TypedDecl>,
}
#[derive(Debug, PartialEq, Eq)]
pub enum TypedDecl {
Function {
name: String,
params: Vec<(String, Ty)>,
return_type: Ty,
body: TypedStmt,
},
}
#[derive(Debug, PartialEq, Eq)]
pub enum TypedStmt {
Compound {
inner: Vec<TypedStmt>,
},
If {
condition: TypedExpr,
then: Box<TypedStmt>,
elze: Option<Box<TypedStmt>>,
},
Return {
value: Option<TypedExpr>,
},
}
#[derive(Debug, PartialEq, Eq)]
pub struct TypedExpr {
pub kind: TypedExprKind,
pub ty: Ty,
}
#[derive(Debug, PartialEq, Eq)]
pub enum TypedExprKind {
Identifier {
name: String,
},
Integer {
value: u64,
},
Boolean {
value: bool,
},
Unary {
op: UnaryOp,
expr: Box<TypedExpr>,
},
Binary {
op: BinaryOp,
lhs: Box<TypedExpr>,
rhs: Box<TypedExpr>,
},
}