From 546dc119d0cd4dbc9a4683e7ce491a1d376a73ee Mon Sep 17 00:00:00 2001 From: Jooris Hadeler Date: Tue, 10 Mar 2026 18:03:56 +0100 Subject: [PATCH] Add function/struct definition parsing and program entry point - ast.rs: Param, FieldDef, FuncDef, StructDef, TopLevelDef, TopLevelDefKind, Program - parser.rs: parse_program, parse_top_level_def, parse_func_def, parse_struct_def with param/field list helpers; synchronize_top_level for recovery; 14 new tests (76 total) - main.rs: parse source file as a Program and print the AST --- fluxc/src/ast.rs | 61 +++++++++ fluxc/src/main.rs | 45 ++----- fluxc/src/parser.rs | 322 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 393 insertions(+), 35 deletions(-) diff --git a/fluxc/src/ast.rs b/fluxc/src/ast.rs index d5a7f7a..782f877 100644 --- a/fluxc/src/ast.rs +++ b/fluxc/src/ast.rs @@ -201,3 +201,64 @@ pub enum StmtKind { /// Error placeholder — emitted during recovery so the parent can continue. Error, } + +// ── Top-level definitions ────────────────────────────────────────────────────── + +/// A function parameter: `[mut] name : type`. +#[derive(Debug, Clone)] +pub struct Param { + pub mutable: bool, + pub name: String, + pub name_span: Span, + pub ty: Type, +} + +/// A struct definition field: `name : type`. +/// +/// Named `FieldDef` to distinguish from `StructField`, which is a +/// field in a struct *literal expression*. +#[derive(Debug, Clone)] +pub struct FieldDef { + pub name: String, + pub name_span: Span, + pub ty: Type, +} + +/// `fn name ( params ) [ -> type ] block` +#[derive(Debug, Clone)] +pub struct FuncDef { + pub name: String, + pub name_span: Span, + pub params: Vec, + pub ret_ty: Option, + pub body: Block, +} + +/// `struct name { fields }` +#[derive(Debug, Clone)] +pub struct StructDef { + pub name: String, + pub name_span: Span, + pub fields: Vec, +} + +#[derive(Debug, Clone)] +pub struct TopLevelDef { + pub kind: TopLevelDefKind, + pub span: Span, +} + +#[derive(Debug, Clone)] +pub enum TopLevelDefKind { + Func(FuncDef), + Struct(StructDef), + /// Error placeholder for recovery. + Error, +} + +/// The root of the AST — a sequence of top-level definitions. +#[derive(Debug, Clone)] +pub struct Program { + pub defs: Vec, + pub span: Span, +} diff --git a/fluxc/src/main.rs b/fluxc/src/main.rs index cae111d..10cd541 100644 --- a/fluxc/src/main.rs +++ b/fluxc/src/main.rs @@ -1,4 +1,4 @@ -use std::io::{self, BufRead, Write}; +use std::{env::args, fs}; use crate::parser::Parser; @@ -8,39 +8,18 @@ pub mod parser; pub mod token; fn main() { - let stdin = io::stdin(); - let stdout = io::stdout(); + let path = args().nth(1).expect("usage: fluxc "); + let content = fs::read_to_string(&path).unwrap_or_else(|e| { + eprintln!("error: {e}"); + std::process::exit(1) + }); - println!("flux expression REPL (ctrl+d to exit)"); + let mut parser = Parser::new(&content); + let program = parser.parse_program(); - loop { - print!("> "); - stdout.lock().flush().unwrap(); - - let mut line = String::new(); - match stdin.lock().read_line(&mut line) { - Ok(0) => break, // EOF - Ok(_) => {} - Err(e) => { - eprintln!("error: {e}"); - break; - } - } - - let src = line.trim(); - if src.is_empty() { - continue; - } - - let mut parser = Parser::new(src); - let node = parser.parse_stmt(); - - for err in &parser.errors { - eprintln!("parse error: {err}"); - } - - if parser.errors.is_empty() { - println!("{node:#?}"); - } + for err in &parser.errors { + eprintln!("parse error: {err}"); } + + println!("{program:#?}"); } diff --git a/fluxc/src/parser.rs b/fluxc/src/parser.rs index 5de1f44..ec91dc7 100644 --- a/fluxc/src/parser.rs +++ b/fluxc/src/parser.rs @@ -2,7 +2,8 @@ use std::fmt; use crate::{ ast::{ - BinaryOp, Block, ElseBranch, Expr, ExprKind, Stmt, StmtKind, StructField, Type, UnaryOp, + BinaryOp, Block, ElseBranch, Expr, ExprKind, FieldDef, FuncDef, Param, Program, Stmt, + StmtKind, StructDef, StructField, TopLevelDef, TopLevelDefKind, Type, UnaryOp, }, lexer::Lexer, token::{Span, Token, TokenKind}, @@ -657,6 +658,158 @@ impl<'src> Parser<'src> { // ── Argument list ───────────────────────────────────────────────────────── + // ── Top-level definitions ───────────────────────────────────────────────── + + /// Parse an entire source file as a `Program`. + pub fn parse_program(&mut self) -> Program { + let start = self.current().span; + let mut defs = Vec::new(); + loop { + if self.current().kind == TokenKind::Eof { + break; + } + defs.push(self.parse_top_level_def()); + } + let span = start.cover(self.current().span); + Program { defs, span } + } + + /// Parse one top-level definition (`fn` or `struct`). + pub fn parse_top_level_def(&mut self) -> TopLevelDef { + let tok = self.current(); + match tok.kind { + TokenKind::Fn => self.parse_func_def(), + TokenKind::Struct => self.parse_struct_def(), + _ => { + self.errors.push(ParseError { + span: tok.span, + message: format!("expected `fn` or `struct`, found {}", tok.kind), + }); + self.synchronize_top_level(); + TopLevelDef { + kind: TopLevelDefKind::Error, + span: tok.span, + } + } + } + } + + /// Skip tokens until the next top-level boundary (`fn`, `struct`, or EOF). + fn synchronize_top_level(&mut self) { + loop { + match self.current().kind { + TokenKind::Eof | TokenKind::Fn | TokenKind::Struct => break, + _ => { + self.advance(); + } + } + } + } + + fn parse_func_def(&mut self) -> TopLevelDef { + let kw = self.advance(); // consume `fn` + let name_tok = self.expect(TokenKind::Ident); + self.expect(TokenKind::LParen); + let params = self.parse_param_list(); + self.expect(TokenKind::RParen); + let ret_ty = if self.current().kind == TokenKind::Arrow { + self.advance(); + Some(self.parse_type()) + } else { + None + }; + let body = self.parse_block(); + let span = kw.span.cover(body.span); + TopLevelDef { + kind: TopLevelDefKind::Func(FuncDef { + name: name_tok.text.to_owned(), + name_span: name_tok.span, + params, + ret_ty, + body, + }), + span, + } + } + + fn parse_param_list(&mut self) -> Vec { + let mut params = Vec::new(); + loop { + if matches!(self.current().kind, TokenKind::RParen | TokenKind::Eof) { + break; + } + params.push(self.parse_param()); + if self.current().kind == TokenKind::Comma { + self.advance(); + } else { + break; + } + } + params + } + + fn parse_param(&mut self) -> Param { + let mutable = if self.current().kind == TokenKind::Mut { + self.advance(); + true + } else { + false + }; + let name_tok = self.expect(TokenKind::Ident); + self.expect(TokenKind::Colon); + let ty = self.parse_type(); + Param { + mutable, + name: name_tok.text.to_owned(), + name_span: name_tok.span, + ty, + } + } + + fn parse_struct_def(&mut self) -> TopLevelDef { + let kw = self.advance(); // consume `struct` + let name_tok = self.expect(TokenKind::Ident); + self.expect(TokenKind::LCurly); + let fields = self.parse_field_def_list(); + let close = self.expect(TokenKind::RCurly); + let span = kw.span.cover(close.span); + TopLevelDef { + kind: TopLevelDefKind::Struct(StructDef { + name: name_tok.text.to_owned(), + name_span: name_tok.span, + fields, + }), + span, + } + } + + fn parse_field_def_list(&mut self) -> Vec { + let mut fields = Vec::new(); + loop { + if matches!(self.current().kind, TokenKind::RCurly | TokenKind::Eof) { + break; + } + fields.push(self.parse_field_def()); + if self.current().kind == TokenKind::Comma { + self.advance(); + } else { + break; + } + } + fields + } + + fn parse_field_def(&mut self) -> FieldDef { + let name_tok = self.expect(TokenKind::Ident); + self.expect(TokenKind::Colon); + let ty = self.parse_type(); + FieldDef { + name: name_tok.text.to_owned(), + name_span: name_tok.span, + ty, + } + } + /// Parse `arg, arg, …` up to `)`. The opening `(` has already been /// consumed by `parse_led`. Returns `(args, close_span)`. fn parse_arg_list(&mut self) -> (Vec, Span) { @@ -683,7 +836,7 @@ impl<'src> Parser<'src> { #[cfg(test)] mod tests { use super::*; - use crate::ast::{ElseBranch, ExprKind, StmtKind, Type}; + use crate::ast::{ElseBranch, ExprKind, StmtKind, TopLevelDefKind, Type}; // ── Expression test helpers ─────────────────────────────────────────────── @@ -1165,4 +1318,169 @@ mod tests { let s = p.parse_stmt(); assert!(matches!(s.kind, StmtKind::If { .. })); } + + // ── Function definition tests ───────────────────────────────────────────── + + fn top(src: &str) -> TopLevelDef { + Parser::new(src).parse_top_level_def() + } + + #[test] + fn func_def_empty() { + let d = top("fn foo() { }"); + match &d.kind { + TopLevelDefKind::Func(f) => { + assert_eq!(f.name, "foo"); + assert!(f.params.is_empty()); + assert!(f.ret_ty.is_none()); + } + _ => panic!("expected func def"), + } + } + + #[test] + fn func_def_with_return_type() { + let d = top("fn answer() -> i32 { return 42; }"); + match &d.kind { + TopLevelDefKind::Func(f) => { + assert!(matches!(f.ret_ty, Some(Type::I32))); + } + _ => panic!("expected func def"), + } + } + + #[test] + fn func_def_params() { + let d = top("fn add(a: i32, b: i32) -> i32 { return a + b; }"); + match &d.kind { + TopLevelDefKind::Func(f) => { + assert_eq!(f.params.len(), 2); + assert_eq!(f.params[0].name, "a"); + assert!(!f.params[0].mutable); + assert!(matches!(f.params[0].ty, Type::I32)); + assert_eq!(f.params[1].name, "b"); + } + _ => panic!("expected func def"), + } + } + + #[test] + fn func_def_mut_param() { + let d = top("fn inc(mut n: i32) -> i32 { n = n + 1; return n; }"); + match &d.kind { + TopLevelDefKind::Func(f) => { + assert!(f.params[0].mutable); + } + _ => panic!("expected func def"), + } + } + + #[test] + fn func_def_pointer_param() { + let d = top("fn foo(p: *i32) { }"); + match &d.kind { + TopLevelDefKind::Func(f) => { + assert!(matches!(f.params[0].ty, Type::Pointer(_))); + } + _ => panic!("expected func def"), + } + } + + // ── Struct definition tests ─────────────────────────────────────────────── + + #[test] + fn struct_def_empty() { + let d = top("struct Empty { }"); + match &d.kind { + TopLevelDefKind::Struct(s) => { + assert_eq!(s.name, "Empty"); + assert!(s.fields.is_empty()); + } + _ => panic!("expected struct def"), + } + } + + #[test] + fn struct_def_with_fields() { + let d = top("struct Point { x: f64, y: f64 }"); + match &d.kind { + TopLevelDefKind::Struct(s) => { + assert_eq!(s.name, "Point"); + assert_eq!(s.fields.len(), 2); + assert_eq!(s.fields[0].name, "x"); + assert!(matches!(s.fields[0].ty, Type::F64)); + assert_eq!(s.fields[1].name, "y"); + } + _ => panic!("expected struct def"), + } + } + + #[test] + fn struct_def_named_field_type() { + let d = top("struct Node { value: i32, next: *Node }"); + match &d.kind { + TopLevelDefKind::Struct(s) => { + assert!(matches!(s.fields[1].ty, Type::Pointer(_))); + } + _ => panic!("expected struct def"), + } + } + + // ── Program tests ───────────────────────────────────────────────────────── + + fn program(src: &str) -> Program { + Parser::new(src).parse_program() + } + + #[test] + fn program_empty() { + let p = program(""); + assert!(p.defs.is_empty()); + } + + #[test] + fn program_single_func() { + let p = program("fn main() { }"); + assert_eq!(p.defs.len(), 1); + assert!(matches!(p.defs[0].kind, TopLevelDefKind::Func(_))); + } + + #[test] + fn program_struct_and_func() { + let src = "struct Point { x: f64, y: f64 } fn main() { }"; + let p = program(src); + assert_eq!(p.defs.len(), 2); + assert!(matches!(p.defs[0].kind, TopLevelDefKind::Struct(_))); + assert!(matches!(p.defs[1].kind, TopLevelDefKind::Func(_))); + } + + #[test] + fn program_multiple_funcs() { + let src = "fn foo() { } fn bar() -> i32 { return 1; } fn baz(x: bool) { }"; + let p = program(src); + assert_eq!(p.defs.len(), 3); + } + + // ── Top-level recovery tests ────────────────────────────────────────────── + + #[test] + fn top_level_stray_token_synchronizes() { + // A stray token should produce Error and then the next definition + // should still parse correctly. + let mut p = Parser::new("42 fn foo() { }"); + let d1 = p.parse_top_level_def(); + let d2 = p.parse_top_level_def(); + assert!(matches!(d1.kind, TopLevelDefKind::Error)); + assert!(matches!(d2.kind, TopLevelDefKind::Func(_))); + assert!(!p.errors.is_empty()); + } + + #[test] + fn top_level_missing_func_name_inserts_dummy() { + // `fn () { }` — missing name; expect() inserts a dummy, no panic. + let mut p = Parser::new("fn () { }"); + let d = p.parse_top_level_def(); + assert!(!p.errors.is_empty()); + assert!(matches!(d.kind, TopLevelDefKind::Func(_))); + } }