diff --git a/fluxc/src/ast.rs b/fluxc/src/ast.rs index f790bd3..d5a7f7a 100644 --- a/fluxc/src/ast.rs +++ b/fluxc/src/ast.rs @@ -35,6 +35,38 @@ pub enum BinaryOp { Rem, // `%` } +// ── Types ────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub enum Type { + // Unsigned integers + U8, + U16, + U32, + U64, + // Signed integers + I8, + I16, + I32, + I64, + // Floating-point + F32, + F64, + // Other primitives + Bool, + Char, + // User-defined named type (e.g. a struct) + Named(String, Span), + // Typed pointer: `*type` + Pointer(Box), + // Opaque (untyped) pointer: `*opaque` + OpaquePointer, + // Fixed-size array: `[type; INT_LIT]` + Array { elem: Box, size: String }, + // Error placeholder for recovery + Error, +} + // ── Struct literal field ─────────────────────────────────────────────────────── #[derive(Debug, Clone)] @@ -111,3 +143,61 @@ pub enum ExprKind { // Placeholder for parse errors — allows parsing to continue Error, } + +// ── Block ────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct Block { + pub stmts: Vec, + pub span: Span, +} + +// ── Else branch ─────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub enum ElseBranch { + If(Box), // `else if …` + Block(Block), // `else { … }` +} + +// ── Statement ───────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct Stmt { + pub kind: StmtKind, + pub span: Span, +} + +#[derive(Debug, Clone)] +pub enum StmtKind { + /// `let [mut] name [: type] [= expr] ;` + Let { + mutable: bool, + name: String, + name_span: Span, + ty: Option, + init: Option, + }, + /// `return [expr] ;` + Return(Option), + /// `if expr_ns block [else else_branch]` + If { + cond: Expr, + then_block: Block, + else_branch: Option, + }, + /// `while expr_ns block` + While { cond: Expr, body: Block }, + /// `loop block` + Loop { body: Block }, + /// `break ;` + Break, + /// `continue ;` + Continue, + /// `{ stmts }` + Block(Block), + /// `expr ;` + Expr(Expr), + /// Error placeholder — emitted during recovery so the parent can continue. + Error, +} diff --git a/fluxc/src/main.rs b/fluxc/src/main.rs index fd73427..cae111d 100644 --- a/fluxc/src/main.rs +++ b/fluxc/src/main.rs @@ -33,14 +33,14 @@ fn main() { } let mut parser = Parser::new(src); - let expr = parser.parse_expr(true); + let node = parser.parse_stmt(); for err in &parser.errors { eprintln!("parse error: {err}"); } if parser.errors.is_empty() { - println!("{expr:#?}"); + println!("{node:#?}"); } } } diff --git a/fluxc/src/parser.rs b/fluxc/src/parser.rs index 7346ec5..5de1f44 100644 --- a/fluxc/src/parser.rs +++ b/fluxc/src/parser.rs @@ -1,7 +1,9 @@ use std::fmt; use crate::{ - ast::{BinaryOp, Expr, ExprKind, StructField, UnaryOp}, + ast::{ + BinaryOp, Block, ElseBranch, Expr, ExprKind, Stmt, StmtKind, StructField, Type, UnaryOp, + }, lexer::Lexer, token::{Span, Token, TokenKind}, }; @@ -133,7 +135,7 @@ impl<'src> Parser<'src> { /// Consume the current token if it matches `kind`; otherwise record an /// error and return a zero-width dummy token at the current position - /// so that parsing can continue. + /// so that parsing can continue (missing-token insertion). fn expect(&mut self, kind: TokenKind) -> Token<'src> { let tok = self.current(); if tok.kind == kind { @@ -148,8 +150,176 @@ impl<'src> Parser<'src> { } } + /// Skip tokens until we reach a natural statement boundary, so that + /// subsequent statements can still be parsed cleanly. + /// + /// Stops *before* statement-starting keywords and `}` (so the caller can + /// handle them), and stops *after* consuming a `;`. + fn synchronize(&mut self) { + loop { + match self.current().kind { + // Stop before these — they begin the next statement or close a block. + TokenKind::Eof + | TokenKind::RCurly + | TokenKind::Let + | TokenKind::Return + | TokenKind::If + | TokenKind::While + | TokenKind::Loop + | TokenKind::Break + | TokenKind::Continue => break, + // Consume the `;` and stop — it terminates the current statement. + TokenKind::Semicolon => { + self.advance(); + break; + } + _ => { + self.advance(); + } + } + } + } + // ── Public API ──────────────────────────────────────────────────────────── + /// Parse a type annotation. + pub fn parse_type(&mut self) -> Type { + let tok = self.advance(); + match tok.kind { + // Primitive types + TokenKind::U8 => Type::U8, + TokenKind::U16 => Type::U16, + TokenKind::U32 => Type::U32, + TokenKind::U64 => Type::U64, + TokenKind::I8 => Type::I8, + TokenKind::I16 => Type::I16, + TokenKind::I32 => Type::I32, + TokenKind::I64 => Type::I64, + TokenKind::F32 => Type::F32, + TokenKind::F64 => Type::F64, + TokenKind::Bool => Type::Bool, + TokenKind::Char => Type::Char, + + // Named type (user-defined struct, etc.) + TokenKind::Ident => Type::Named(tok.text.to_owned(), tok.span), + + // Pointer: `*opaque` or `*` + TokenKind::Star => { + if self.current().kind == TokenKind::Opaque { + self.advance(); + Type::OpaquePointer + } else { + Type::Pointer(Box::new(self.parse_type())) + } + } + + // Array: `[type; INT_LIT]` + TokenKind::LBracket => { + let elem = self.parse_type(); + self.expect(TokenKind::Semicolon); + let size_tok = self.expect(TokenKind::IntLit); + self.expect(TokenKind::RBracket); + Type::Array { + elem: Box::new(elem), + size: size_tok.text.to_owned(), + } + } + + // Error — insert recovery placeholder + _ => { + self.errors.push(ParseError { + span: tok.span, + message: format!("expected type, found {}", tok.kind), + }); + Type::Error + } + } + } + + /// Parse a block: `{ stmt* }`. + pub fn parse_block(&mut self) -> Block { + let open = self.expect(TokenKind::LCurly); + let mut stmts = Vec::new(); + loop { + if matches!(self.current().kind, TokenKind::RCurly | TokenKind::Eof) { + break; + } + stmts.push(self.parse_stmt()); + } + let close = self.expect(TokenKind::RCurly); + Block { + stmts, + span: open.span.cover(close.span), + } + } + + /// Parse a single statement. + /// + /// **Recovery policy** + /// - *Missing-token insertion*: `expect()` handles single missing tokens + /// (e.g. a forgotten `;`) by inserting a zero-width dummy — no tokens + /// are skipped and no error cascades. + /// - *Synchronization*: tokens that can never start a statement or + /// expression trigger `synchronize()`, which skips forward until the + /// next statement boundary to prevent cascading errors. + pub fn parse_stmt(&mut self) -> Stmt { + let tok = self.current(); + match tok.kind { + TokenKind::Let => self.parse_let_stmt(), + TokenKind::Return => self.parse_return_stmt(), + TokenKind::If => self.parse_if_stmt(), + TokenKind::While => self.parse_while_stmt(), + TokenKind::Loop => self.parse_loop_stmt(), + TokenKind::Break => { + let kw = self.advance(); + let semi = self.expect(TokenKind::Semicolon); + Stmt { + kind: StmtKind::Break, + span: kw.span.cover(semi.span), + } + } + TokenKind::Continue => { + let kw = self.advance(); + let semi = self.expect(TokenKind::Semicolon); + Stmt { + kind: StmtKind::Continue, + span: kw.span.cover(semi.span), + } + } + TokenKind::LCurly => { + let block = self.parse_block(); + let span = block.span; + Stmt { + kind: StmtKind::Block(block), + span, + } + } + // Tokens that cannot start any statement or expression. + // Synchronize to prevent cascading errors. + TokenKind::RCurly + | TokenKind::RParen + | TokenKind::RBracket + | TokenKind::Else + | TokenKind::Comma + | TokenKind::Arrow + | TokenKind::Fn + | TokenKind::Struct + | TokenKind::Eof => { + self.errors.push(ParseError { + span: tok.span, + message: format!("unexpected {} in statement position", tok.kind), + }); + self.synchronize(); + Stmt { + kind: StmtKind::Error, + span: tok.span, + } + } + // Anything else is an expression statement. + _ => self.parse_expr_stmt(), + } + } + /// Parse a single expression. /// /// `allow_struct_literals` controls whether a bare `Ident { … }` is @@ -159,6 +329,119 @@ impl<'src> Parser<'src> { self.pratt(0, allow_struct_literals) } + // ── Statement helpers ───────────────────────────────────────────────────── + + fn parse_let_stmt(&mut self) -> Stmt { + let start = self.advance(); // consume `let` + let mutable = if self.current().kind == TokenKind::Mut { + self.advance(); + true + } else { + false + }; + let name_tok = self.expect(TokenKind::Ident); + let ty = if self.current().kind == TokenKind::Colon { + self.advance(); + Some(self.parse_type()) + } else { + None + }; + let init = if self.current().kind == TokenKind::Eq { + self.advance(); + Some(self.parse_expr(true)) + } else { + None + }; + let semi = self.expect(TokenKind::Semicolon); + Stmt { + kind: StmtKind::Let { + mutable, + name: name_tok.text.to_owned(), + name_span: name_tok.span, + ty, + init, + }, + span: start.span.cover(semi.span), + } + } + + fn parse_return_stmt(&mut self) -> Stmt { + let kw = self.advance(); // consume `return` + // LL(1): `;` → unit return; anything else → parse expression + let value = if self.current().kind != TokenKind::Semicolon { + Some(self.parse_expr(true)) + } else { + None + }; + let semi = self.expect(TokenKind::Semicolon); + Stmt { + kind: StmtKind::Return(value), + span: kw.span.cover(semi.span), + } + } + + fn parse_if_stmt(&mut self) -> Stmt { + let kw = self.advance(); // consume `if` + // Condition: expr_ns (no struct literals at outermost level) + let cond = self.parse_expr(false); + let then_block = self.parse_block(); + let else_branch = if self.current().kind == TokenKind::Else { + self.advance(); // consume `else` + if self.current().kind == TokenKind::If { + let nested = self.parse_if_stmt(); + Some(ElseBranch::If(Box::new(nested))) + } else { + Some(ElseBranch::Block(self.parse_block())) + } + } else { + None + }; + let end_span = match &else_branch { + Some(ElseBranch::If(s)) => s.span, + Some(ElseBranch::Block(b)) => b.span, + None => then_block.span, + }; + Stmt { + kind: StmtKind::If { + cond, + then_block, + else_branch, + }, + span: kw.span.cover(end_span), + } + } + + fn parse_while_stmt(&mut self) -> Stmt { + let kw = self.advance(); // consume `while` + let cond = self.parse_expr(false); // no struct literals in condition + let body = self.parse_block(); + let span = kw.span.cover(body.span); + Stmt { + kind: StmtKind::While { cond, body }, + span, + } + } + + fn parse_loop_stmt(&mut self) -> Stmt { + let kw = self.advance(); // consume `loop` + let body = self.parse_block(); + let span = kw.span.cover(body.span); + Stmt { + kind: StmtKind::Loop { body }, + span, + } + } + + fn parse_expr_stmt(&mut self) -> Stmt { + let expr = self.parse_expr(true); + let semi = self.expect(TokenKind::Semicolon); + let span = expr.span.cover(semi.span); + Stmt { + kind: StmtKind::Expr(expr), + span, + } + } + // ── Pratt core ──────────────────────────────────────────────────────────── fn pratt(&mut self, min_bp: u8, allow_struct_lit: bool) -> Expr { @@ -400,6 +683,9 @@ impl<'src> Parser<'src> { #[cfg(test)] mod tests { use super::*; + use crate::ast::{ElseBranch, ExprKind, StmtKind, Type}; + + // ── Expression test helpers ─────────────────────────────────────────────── fn parse(src: &str) -> Expr { Parser::new(src).parse_expr(true) @@ -409,6 +695,18 @@ mod tests { Parser::new(src).parse_expr(false) } + // ── Statement test helpers ──────────────────────────────────────────────── + + fn stmt(src: &str) -> Stmt { + Parser::new(src).parse_stmt() + } + + fn parse_type_str(src: &str) -> Type { + Parser::new(src).parse_type() + } + + // ── Expression tests ────────────────────────────────────────────────────── + #[test] fn int_literal() { let expr = parse("42"); @@ -614,4 +912,257 @@ mod tests { } )); } + + // ── Type tests ──────────────────────────────────────────────────────────── + + #[test] + fn type_primitives() { + assert!(matches!(parse_type_str("u8"), Type::U8)); + assert!(matches!(parse_type_str("u16"), Type::U16)); + assert!(matches!(parse_type_str("u32"), Type::U32)); + assert!(matches!(parse_type_str("u64"), Type::U64)); + assert!(matches!(parse_type_str("i8"), Type::I8)); + assert!(matches!(parse_type_str("i16"), Type::I16)); + assert!(matches!(parse_type_str("i32"), Type::I32)); + assert!(matches!(parse_type_str("i64"), Type::I64)); + assert!(matches!(parse_type_str("f32"), Type::F32)); + assert!(matches!(parse_type_str("f64"), Type::F64)); + assert!(matches!(parse_type_str("bool"), Type::Bool)); + assert!(matches!(parse_type_str("char"), Type::Char)); + } + + #[test] + fn type_named() { + assert!(matches!(parse_type_str("Foo"), Type::Named(ref s, _) if s == "Foo")); + } + + #[test] + fn type_pointer() { + assert!(matches!(parse_type_str("*i32"), Type::Pointer(_))); + } + + #[test] + fn type_opaque_pointer() { + assert!(matches!(parse_type_str("*opaque"), Type::OpaquePointer)); + } + + #[test] + fn type_array() { + assert!( + matches!(parse_type_str("[i32; 10]"), Type::Array { ref size, .. } if size == "10") + ); + } + + #[test] + fn type_nested_pointer() { + // `**i32` → Pointer(Pointer(I32)) + assert!(matches!(parse_type_str("**i32"), Type::Pointer(_))); + } + + // ── Statement tests ─────────────────────────────────────────────────────── + + #[test] + fn let_basic() { + let s = stmt("let x = 5;"); + match &s.kind { + StmtKind::Let { + mutable, + name, + ty, + init, + .. + } => { + assert!(!mutable); + assert_eq!(name, "x"); + assert!(ty.is_none()); + assert!(matches!( + init.as_ref().unwrap().kind, + ExprKind::IntLit(ref v) if v == "5" + )); + } + _ => panic!("expected let"), + } + } + + #[test] + fn let_mut() { + assert!(matches!( + stmt("let mut x = 5;").kind, + StmtKind::Let { mutable: true, .. } + )); + } + + #[test] + fn let_with_type() { + let s = stmt("let x: i32 = 0;"); + assert!(matches!( + s.kind, + StmtKind::Let { + ty: Some(Type::I32), + .. + } + )); + } + + #[test] + fn let_type_only() { + let s = stmt("let x: bool;"); + assert!(matches!( + s.kind, + StmtKind::Let { + ty: Some(Type::Bool), + init: None, + .. + } + )); + } + + #[test] + fn return_unit() { + assert!(matches!(stmt("return;").kind, StmtKind::Return(None))); + } + + #[test] + fn return_value() { + assert!(matches!( + stmt("return x + 1;").kind, + StmtKind::Return(Some(_)) + )); + } + + #[test] + fn if_no_else() { + assert!(matches!( + stmt("if x < 10 { foo(); }").kind, + StmtKind::If { + else_branch: None, + .. + } + )); + } + + #[test] + fn if_else() { + let s = stmt("if x { a(); } else { b(); }"); + assert!(matches!( + s.kind, + StmtKind::If { + else_branch: Some(ElseBranch::Block(_)), + .. + } + )); + } + + #[test] + fn if_else_if() { + let s = stmt("if a { } else if b { }"); + assert!(matches!( + s.kind, + StmtKind::If { + else_branch: Some(ElseBranch::If(_)), + .. + } + )); + } + + #[test] + fn while_stmt() { + assert!(matches!( + stmt("while n > 0 { n = n - 1; }").kind, + StmtKind::While { .. } + )); + } + + #[test] + fn loop_stmt() { + assert!(matches!( + stmt("loop { break; }").kind, + StmtKind::Loop { .. } + )); + } + + #[test] + fn break_stmt() { + assert!(matches!(stmt("break;").kind, StmtKind::Break)); + } + + #[test] + fn continue_stmt() { + assert!(matches!(stmt("continue;").kind, StmtKind::Continue)); + } + + #[test] + fn block_stmt() { + assert!(matches!(stmt("{ let x = 1; }").kind, StmtKind::Block(_))); + } + + #[test] + fn expr_stmt_call() { + let s = stmt("foo(1, 2);"); + match &s.kind { + StmtKind::Expr(e) => assert!(matches!(e.kind, ExprKind::Call { .. })), + _ => panic!("expected expr stmt"), + } + } + + #[test] + fn nested_blocks() { + // Blocks containing other blocks parse without panic + let s = stmt("{ { let x = 1; } }"); + match &s.kind { + StmtKind::Block(outer) => { + assert_eq!(outer.stmts.len(), 1); + assert!(matches!(outer.stmts[0].kind, StmtKind::Block(_))); + } + _ => panic!("expected block"), + } + } + + // ── Recovery tests ──────────────────────────────────────────────────────── + + #[test] + fn missing_semicolon_records_error() { + // `let x = 5` with no `;` should record exactly one error + let mut p = Parser::new("let x = 5"); + p.parse_stmt(); + assert!(!p.errors.is_empty(), "expected at least one error"); + } + + #[test] + fn stray_token_synchronizes_to_next_stmt() { + // `,` cannot start a statement; parser should synchronize so that + // the following `let` still parses correctly. + let mut p = Parser::new(", let x = 1;"); + let s1 = p.parse_stmt(); + let s2 = p.parse_stmt(); + assert!( + matches!(s1.kind, StmtKind::Error), + "first stmt should be Error" + ); + assert!( + matches!(s2.kind, StmtKind::Let { .. }), + "second stmt should be Let" + ); + } + + #[test] + fn missing_let_name_inserts_dummy() { + // `let = 5;` — missing name, but a dummy is inserted and parsing + // continues; we expect errors but no panic. + let mut p = Parser::new("let = 5;"); + let s = p.parse_stmt(); + assert!(!p.errors.is_empty()); + // Even with the error, we should still get a Let node back. + assert!(matches!(s.kind, StmtKind::Let { .. })); + } + + #[test] + fn if_condition_no_struct_literal() { + // `if Foo { x: 1 } { }` — `Foo` is the condition (no struct literal + // allowed), `{ x: 1 }` is an unexpected block; `{ }` is the body. + // The important thing is that this doesn't panic. + let mut p = Parser::new("if Foo { }"); + let s = p.parse_stmt(); + assert!(matches!(s.kind, StmtKind::If { .. })); + } }