Add statement parsing with error recovery

- ast.rs: add Type, Block, ElseBranch, Stmt, StmtKind
- parser.rs: parse_type, parse_block, parse_stmt and all sub-parsers;
  missing-token insertion via expect(), synchronize() for panic-mode
  recovery on non-startable tokens; 26 new tests (62 total)
- main.rs: REPL now parses statements instead of bare expressions
This commit is contained in:
2026-03-10 17:55:22 +01:00
parent becc7a2d34
commit d556a54541
3 changed files with 645 additions and 4 deletions

View File

@@ -35,6 +35,38 @@ pub enum BinaryOp {
Rem, // `%`
}
// ── Types ──────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub enum Type {
// Unsigned integers
U8,
U16,
U32,
U64,
// Signed integers
I8,
I16,
I32,
I64,
// Floating-point
F32,
F64,
// Other primitives
Bool,
Char,
// User-defined named type (e.g. a struct)
Named(String, Span),
// Typed pointer: `*type`
Pointer(Box<Type>),
// Opaque (untyped) pointer: `*opaque`
OpaquePointer,
// Fixed-size array: `[type; INT_LIT]`
Array { elem: Box<Type>, size: String },
// Error placeholder for recovery
Error,
}
// ── Struct literal field ───────────────────────────────────────────────────────
#[derive(Debug, Clone)]
@@ -111,3 +143,61 @@ pub enum ExprKind {
// Placeholder for parse errors — allows parsing to continue
Error,
}
// ── Block ──────────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct Block {
pub stmts: Vec<Stmt>,
pub span: Span,
}
// ── Else branch ───────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub enum ElseBranch {
If(Box<Stmt>), // `else if …`
Block(Block), // `else { … }`
}
// ── Statement ─────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct Stmt {
pub kind: StmtKind,
pub span: Span,
}
#[derive(Debug, Clone)]
pub enum StmtKind {
/// `let [mut] name [: type] [= expr] ;`
Let {
mutable: bool,
name: String,
name_span: Span,
ty: Option<Type>,
init: Option<Expr>,
},
/// `return [expr] ;`
Return(Option<Expr>),
/// `if expr_ns block [else else_branch]`
If {
cond: Expr,
then_block: Block,
else_branch: Option<ElseBranch>,
},
/// `while expr_ns block`
While { cond: Expr, body: Block },
/// `loop block`
Loop { body: Block },
/// `break ;`
Break,
/// `continue ;`
Continue,
/// `{ stmts }`
Block(Block),
/// `expr ;`
Expr(Expr),
/// Error placeholder — emitted during recovery so the parent can continue.
Error,
}

View File

@@ -33,14 +33,14 @@ fn main() {
}
let mut parser = Parser::new(src);
let expr = parser.parse_expr(true);
let node = parser.parse_stmt();
for err in &parser.errors {
eprintln!("parse error: {err}");
}
if parser.errors.is_empty() {
println!("{expr:#?}");
println!("{node:#?}");
}
}
}

View File

@@ -1,7 +1,9 @@
use std::fmt;
use crate::{
ast::{BinaryOp, Expr, ExprKind, StructField, UnaryOp},
ast::{
BinaryOp, Block, ElseBranch, Expr, ExprKind, Stmt, StmtKind, StructField, Type, UnaryOp,
},
lexer::Lexer,
token::{Span, Token, TokenKind},
};
@@ -133,7 +135,7 @@ impl<'src> Parser<'src> {
/// Consume the current token if it matches `kind`; otherwise record an
/// error and return a zero-width dummy token at the current position
/// so that parsing can continue.
/// so that parsing can continue (missing-token insertion).
fn expect(&mut self, kind: TokenKind) -> Token<'src> {
let tok = self.current();
if tok.kind == kind {
@@ -148,8 +150,176 @@ impl<'src> Parser<'src> {
}
}
/// Skip tokens until we reach a natural statement boundary, so that
/// subsequent statements can still be parsed cleanly.
///
/// Stops *before* statement-starting keywords and `}` (so the caller can
/// handle them), and stops *after* consuming a `;`.
fn synchronize(&mut self) {
loop {
match self.current().kind {
// Stop before these — they begin the next statement or close a block.
TokenKind::Eof
| TokenKind::RCurly
| TokenKind::Let
| TokenKind::Return
| TokenKind::If
| TokenKind::While
| TokenKind::Loop
| TokenKind::Break
| TokenKind::Continue => break,
// Consume the `;` and stop — it terminates the current statement.
TokenKind::Semicolon => {
self.advance();
break;
}
_ => {
self.advance();
}
}
}
}
// ── Public API ────────────────────────────────────────────────────────────
/// Parse a type annotation.
pub fn parse_type(&mut self) -> Type {
let tok = self.advance();
match tok.kind {
// Primitive types
TokenKind::U8 => Type::U8,
TokenKind::U16 => Type::U16,
TokenKind::U32 => Type::U32,
TokenKind::U64 => Type::U64,
TokenKind::I8 => Type::I8,
TokenKind::I16 => Type::I16,
TokenKind::I32 => Type::I32,
TokenKind::I64 => Type::I64,
TokenKind::F32 => Type::F32,
TokenKind::F64 => Type::F64,
TokenKind::Bool => Type::Bool,
TokenKind::Char => Type::Char,
// Named type (user-defined struct, etc.)
TokenKind::Ident => Type::Named(tok.text.to_owned(), tok.span),
// Pointer: `*opaque` or `*<type>`
TokenKind::Star => {
if self.current().kind == TokenKind::Opaque {
self.advance();
Type::OpaquePointer
} else {
Type::Pointer(Box::new(self.parse_type()))
}
}
// Array: `[type; INT_LIT]`
TokenKind::LBracket => {
let elem = self.parse_type();
self.expect(TokenKind::Semicolon);
let size_tok = self.expect(TokenKind::IntLit);
self.expect(TokenKind::RBracket);
Type::Array {
elem: Box::new(elem),
size: size_tok.text.to_owned(),
}
}
// Error — insert recovery placeholder
_ => {
self.errors.push(ParseError {
span: tok.span,
message: format!("expected type, found {}", tok.kind),
});
Type::Error
}
}
}
/// Parse a block: `{ stmt* }`.
pub fn parse_block(&mut self) -> Block {
let open = self.expect(TokenKind::LCurly);
let mut stmts = Vec::new();
loop {
if matches!(self.current().kind, TokenKind::RCurly | TokenKind::Eof) {
break;
}
stmts.push(self.parse_stmt());
}
let close = self.expect(TokenKind::RCurly);
Block {
stmts,
span: open.span.cover(close.span),
}
}
/// Parse a single statement.
///
/// **Recovery policy**
/// - *Missing-token insertion*: `expect()` handles single missing tokens
/// (e.g. a forgotten `;`) by inserting a zero-width dummy — no tokens
/// are skipped and no error cascades.
/// - *Synchronization*: tokens that can never start a statement or
/// expression trigger `synchronize()`, which skips forward until the
/// next statement boundary to prevent cascading errors.
pub fn parse_stmt(&mut self) -> Stmt {
let tok = self.current();
match tok.kind {
TokenKind::Let => self.parse_let_stmt(),
TokenKind::Return => self.parse_return_stmt(),
TokenKind::If => self.parse_if_stmt(),
TokenKind::While => self.parse_while_stmt(),
TokenKind::Loop => self.parse_loop_stmt(),
TokenKind::Break => {
let kw = self.advance();
let semi = self.expect(TokenKind::Semicolon);
Stmt {
kind: StmtKind::Break,
span: kw.span.cover(semi.span),
}
}
TokenKind::Continue => {
let kw = self.advance();
let semi = self.expect(TokenKind::Semicolon);
Stmt {
kind: StmtKind::Continue,
span: kw.span.cover(semi.span),
}
}
TokenKind::LCurly => {
let block = self.parse_block();
let span = block.span;
Stmt {
kind: StmtKind::Block(block),
span,
}
}
// Tokens that cannot start any statement or expression.
// Synchronize to prevent cascading errors.
TokenKind::RCurly
| TokenKind::RParen
| TokenKind::RBracket
| TokenKind::Else
| TokenKind::Comma
| TokenKind::Arrow
| TokenKind::Fn
| TokenKind::Struct
| TokenKind::Eof => {
self.errors.push(ParseError {
span: tok.span,
message: format!("unexpected {} in statement position", tok.kind),
});
self.synchronize();
Stmt {
kind: StmtKind::Error,
span: tok.span,
}
}
// Anything else is an expression statement.
_ => self.parse_expr_stmt(),
}
}
/// Parse a single expression.
///
/// `allow_struct_literals` controls whether a bare `Ident { … }` is
@@ -159,6 +329,119 @@ impl<'src> Parser<'src> {
self.pratt(0, allow_struct_literals)
}
// ── Statement helpers ─────────────────────────────────────────────────────
fn parse_let_stmt(&mut self) -> Stmt {
let start = self.advance(); // consume `let`
let mutable = if self.current().kind == TokenKind::Mut {
self.advance();
true
} else {
false
};
let name_tok = self.expect(TokenKind::Ident);
let ty = if self.current().kind == TokenKind::Colon {
self.advance();
Some(self.parse_type())
} else {
None
};
let init = if self.current().kind == TokenKind::Eq {
self.advance();
Some(self.parse_expr(true))
} else {
None
};
let semi = self.expect(TokenKind::Semicolon);
Stmt {
kind: StmtKind::Let {
mutable,
name: name_tok.text.to_owned(),
name_span: name_tok.span,
ty,
init,
},
span: start.span.cover(semi.span),
}
}
fn parse_return_stmt(&mut self) -> Stmt {
let kw = self.advance(); // consume `return`
// LL(1): `;` → unit return; anything else → parse expression
let value = if self.current().kind != TokenKind::Semicolon {
Some(self.parse_expr(true))
} else {
None
};
let semi = self.expect(TokenKind::Semicolon);
Stmt {
kind: StmtKind::Return(value),
span: kw.span.cover(semi.span),
}
}
fn parse_if_stmt(&mut self) -> Stmt {
let kw = self.advance(); // consume `if`
// Condition: expr_ns (no struct literals at outermost level)
let cond = self.parse_expr(false);
let then_block = self.parse_block();
let else_branch = if self.current().kind == TokenKind::Else {
self.advance(); // consume `else`
if self.current().kind == TokenKind::If {
let nested = self.parse_if_stmt();
Some(ElseBranch::If(Box::new(nested)))
} else {
Some(ElseBranch::Block(self.parse_block()))
}
} else {
None
};
let end_span = match &else_branch {
Some(ElseBranch::If(s)) => s.span,
Some(ElseBranch::Block(b)) => b.span,
None => then_block.span,
};
Stmt {
kind: StmtKind::If {
cond,
then_block,
else_branch,
},
span: kw.span.cover(end_span),
}
}
fn parse_while_stmt(&mut self) -> Stmt {
let kw = self.advance(); // consume `while`
let cond = self.parse_expr(false); // no struct literals in condition
let body = self.parse_block();
let span = kw.span.cover(body.span);
Stmt {
kind: StmtKind::While { cond, body },
span,
}
}
fn parse_loop_stmt(&mut self) -> Stmt {
let kw = self.advance(); // consume `loop`
let body = self.parse_block();
let span = kw.span.cover(body.span);
Stmt {
kind: StmtKind::Loop { body },
span,
}
}
fn parse_expr_stmt(&mut self) -> Stmt {
let expr = self.parse_expr(true);
let semi = self.expect(TokenKind::Semicolon);
let span = expr.span.cover(semi.span);
Stmt {
kind: StmtKind::Expr(expr),
span,
}
}
// ── Pratt core ────────────────────────────────────────────────────────────
fn pratt(&mut self, min_bp: u8, allow_struct_lit: bool) -> Expr {
@@ -400,6 +683,9 @@ impl<'src> Parser<'src> {
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::{ElseBranch, ExprKind, StmtKind, Type};
// ── Expression test helpers ───────────────────────────────────────────────
fn parse(src: &str) -> Expr {
Parser::new(src).parse_expr(true)
@@ -409,6 +695,18 @@ mod tests {
Parser::new(src).parse_expr(false)
}
// ── Statement test helpers ────────────────────────────────────────────────
fn stmt(src: &str) -> Stmt {
Parser::new(src).parse_stmt()
}
fn parse_type_str(src: &str) -> Type {
Parser::new(src).parse_type()
}
// ── Expression tests ──────────────────────────────────────────────────────
#[test]
fn int_literal() {
let expr = parse("42");
@@ -614,4 +912,257 @@ mod tests {
}
));
}
// ── Type tests ────────────────────────────────────────────────────────────
#[test]
fn type_primitives() {
assert!(matches!(parse_type_str("u8"), Type::U8));
assert!(matches!(parse_type_str("u16"), Type::U16));
assert!(matches!(parse_type_str("u32"), Type::U32));
assert!(matches!(parse_type_str("u64"), Type::U64));
assert!(matches!(parse_type_str("i8"), Type::I8));
assert!(matches!(parse_type_str("i16"), Type::I16));
assert!(matches!(parse_type_str("i32"), Type::I32));
assert!(matches!(parse_type_str("i64"), Type::I64));
assert!(matches!(parse_type_str("f32"), Type::F32));
assert!(matches!(parse_type_str("f64"), Type::F64));
assert!(matches!(parse_type_str("bool"), Type::Bool));
assert!(matches!(parse_type_str("char"), Type::Char));
}
#[test]
fn type_named() {
assert!(matches!(parse_type_str("Foo"), Type::Named(ref s, _) if s == "Foo"));
}
#[test]
fn type_pointer() {
assert!(matches!(parse_type_str("*i32"), Type::Pointer(_)));
}
#[test]
fn type_opaque_pointer() {
assert!(matches!(parse_type_str("*opaque"), Type::OpaquePointer));
}
#[test]
fn type_array() {
assert!(
matches!(parse_type_str("[i32; 10]"), Type::Array { ref size, .. } if size == "10")
);
}
#[test]
fn type_nested_pointer() {
// `**i32` → Pointer(Pointer(I32))
assert!(matches!(parse_type_str("**i32"), Type::Pointer(_)));
}
// ── Statement tests ───────────────────────────────────────────────────────
#[test]
fn let_basic() {
let s = stmt("let x = 5;");
match &s.kind {
StmtKind::Let {
mutable,
name,
ty,
init,
..
} => {
assert!(!mutable);
assert_eq!(name, "x");
assert!(ty.is_none());
assert!(matches!(
init.as_ref().unwrap().kind,
ExprKind::IntLit(ref v) if v == "5"
));
}
_ => panic!("expected let"),
}
}
#[test]
fn let_mut() {
assert!(matches!(
stmt("let mut x = 5;").kind,
StmtKind::Let { mutable: true, .. }
));
}
#[test]
fn let_with_type() {
let s = stmt("let x: i32 = 0;");
assert!(matches!(
s.kind,
StmtKind::Let {
ty: Some(Type::I32),
..
}
));
}
#[test]
fn let_type_only() {
let s = stmt("let x: bool;");
assert!(matches!(
s.kind,
StmtKind::Let {
ty: Some(Type::Bool),
init: None,
..
}
));
}
#[test]
fn return_unit() {
assert!(matches!(stmt("return;").kind, StmtKind::Return(None)));
}
#[test]
fn return_value() {
assert!(matches!(
stmt("return x + 1;").kind,
StmtKind::Return(Some(_))
));
}
#[test]
fn if_no_else() {
assert!(matches!(
stmt("if x < 10 { foo(); }").kind,
StmtKind::If {
else_branch: None,
..
}
));
}
#[test]
fn if_else() {
let s = stmt("if x { a(); } else { b(); }");
assert!(matches!(
s.kind,
StmtKind::If {
else_branch: Some(ElseBranch::Block(_)),
..
}
));
}
#[test]
fn if_else_if() {
let s = stmt("if a { } else if b { }");
assert!(matches!(
s.kind,
StmtKind::If {
else_branch: Some(ElseBranch::If(_)),
..
}
));
}
#[test]
fn while_stmt() {
assert!(matches!(
stmt("while n > 0 { n = n - 1; }").kind,
StmtKind::While { .. }
));
}
#[test]
fn loop_stmt() {
assert!(matches!(
stmt("loop { break; }").kind,
StmtKind::Loop { .. }
));
}
#[test]
fn break_stmt() {
assert!(matches!(stmt("break;").kind, StmtKind::Break));
}
#[test]
fn continue_stmt() {
assert!(matches!(stmt("continue;").kind, StmtKind::Continue));
}
#[test]
fn block_stmt() {
assert!(matches!(stmt("{ let x = 1; }").kind, StmtKind::Block(_)));
}
#[test]
fn expr_stmt_call() {
let s = stmt("foo(1, 2);");
match &s.kind {
StmtKind::Expr(e) => assert!(matches!(e.kind, ExprKind::Call { .. })),
_ => panic!("expected expr stmt"),
}
}
#[test]
fn nested_blocks() {
// Blocks containing other blocks parse without panic
let s = stmt("{ { let x = 1; } }");
match &s.kind {
StmtKind::Block(outer) => {
assert_eq!(outer.stmts.len(), 1);
assert!(matches!(outer.stmts[0].kind, StmtKind::Block(_)));
}
_ => panic!("expected block"),
}
}
// ── Recovery tests ────────────────────────────────────────────────────────
#[test]
fn missing_semicolon_records_error() {
// `let x = 5` with no `;` should record exactly one error
let mut p = Parser::new("let x = 5");
p.parse_stmt();
assert!(!p.errors.is_empty(), "expected at least one error");
}
#[test]
fn stray_token_synchronizes_to_next_stmt() {
// `,` cannot start a statement; parser should synchronize so that
// the following `let` still parses correctly.
let mut p = Parser::new(", let x = 1;");
let s1 = p.parse_stmt();
let s2 = p.parse_stmt();
assert!(
matches!(s1.kind, StmtKind::Error),
"first stmt should be Error"
);
assert!(
matches!(s2.kind, StmtKind::Let { .. }),
"second stmt should be Let"
);
}
#[test]
fn missing_let_name_inserts_dummy() {
// `let = 5;` — missing name, but a dummy is inserted and parsing
// continues; we expect errors but no panic.
let mut p = Parser::new("let = 5;");
let s = p.parse_stmt();
assert!(!p.errors.is_empty());
// Even with the error, we should still get a Let node back.
assert!(matches!(s.kind, StmtKind::Let { .. }));
}
#[test]
fn if_condition_no_struct_literal() {
// `if Foo { x: 1 } { }` — `Foo` is the condition (no struct literal
// allowed), `{ x: 1 }` is an unexpected block; `{ }` is the body.
// The important thing is that this doesn't panic.
let mut p = Parser::new("if Foo { }");
let s = p.parse_stmt();
assert!(matches!(s.kind, StmtKind::If { .. }));
}
}