Add expression AST and Pratt parser with REPL

- ast.rs: Expr/ExprKind with UnaryOp, BinaryOp, StructField - parser.rs: Pratt expression parser with allow_struct_literals flag, error recovery via dummy tokens, and 19 unit tests - main.rs: interactive expression REPL (prints parsed AST)
2026-03-10 17:40:52 +01:00
parent 4f80de51b2
commit becc7a2d34
3 changed files with 767 additions and 6 deletions
--- a/fluxc/src/ast.rs
+++ b/fluxc/src/ast.rs
@@ -0,0 +1,113 @@
 use crate::token::Span;
 // ── Operators ──────────────────────────────────────────────────────────────────
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum UnaryOp {
    Neg,    // `-`
    Not,    // `!`
    BitNot, // `~`
    Deref,  // `*`
    AddrOf, // `&`
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum BinaryOp {
    // Logical
    Or,  // `or`
    And, // `and`
    // Bitwise
    BitOr,  // `|`
    BitXor, // `^`
    BitAnd, // `&`
    // Comparison
    Eq, // `==`
    Ne, // `!=`
    Lt, // `<`
    Gt, // `>`
    Le, // `<=`
    Ge, // `>=`
    // Arithmetic
    Add, // `+`
    Sub, // `-`
    Mul, // `*`
    Div, // `/`
    Rem, // `%`
 }
 // ── Struct literal field ───────────────────────────────────────────────────────
 #[derive(Debug, Clone)]
 pub struct StructField {
    pub name: String,
    pub name_span: Span,
    pub value: Expr,
 }
 // ── Expression ────────────────────────────────────────────────────────────────
 #[derive(Debug, Clone)]
 pub struct Expr {
    pub kind: ExprKind,
    pub span: Span,
 }
 impl Expr {
    pub fn new(kind: ExprKind, span: Span) -> Self {
        Self { kind, span }
    }
 }
 #[derive(Debug, Clone)]
 pub enum ExprKind {
    // Literals
    IntLit(String),
    FloatLit(String),
    StringLit(String),
    CharLit(String),
    Bool(bool),
    // Identifier
    Ident(String),
    // Struct literal: `Foo { x: 1, y: 2 }`
    StructLit {
        name: String,
        name_span: Span,
        fields: Vec<StructField>,
    },
    // Operators
    Unary {
        op: UnaryOp,
        op_span: Span,
        expr: Box<Expr>,
    },
    Binary {
        op: BinaryOp,
        op_span: Span,
        lhs: Box<Expr>,
        rhs: Box<Expr>,
    },
    // Postfix
    Field {
        expr: Box<Expr>,
        field: String,
        field_span: Span,
    },
    Index {
        expr: Box<Expr>,
        index: Box<Expr>,
    },
    Call {
        callee: Box<Expr>,
        args: Vec<Expr>,
    },
    // Parenthesised expression
    Group(Box<Expr>),
    // Placeholder for parse errors — allows parsing to continue
    Error,
 }
--- a/fluxc/src/main.rs
+++ b/fluxc/src/main.rs
@@ -1,15 +1,46 @@
-use std::{env::args, fs};
+use std::io::{self, BufRead, Write};
-use crate::lexer::Lexer;
+use crate::parser::Parser;
 pub mod ast;
 pub mod lexer;
 pub mod parser;
 pub mod token;
 fn main() {
-    let path = args().nth(1).expect("usage: fluxc <file>");
+    let stdin = io::stdin();
-    let content = fs::read_to_string(&path).expect("error: failed to read file");
+    let stdout = io::stdout();
-    for token in Lexer::new(&content) {
+    println!("flux expression REPL (ctrl+d to exit)");
-        println!("{token:?}");
+
    loop {
        print!("> ");
        stdout.lock().flush().unwrap();
        let mut line = String::new();
        match stdin.lock().read_line(&mut line) {
            Ok(0) => break, // EOF
            Ok(_) => {}
            Err(e) => {
                eprintln!("error: {e}");
                break;
            }
        }
        let src = line.trim();
        if src.is_empty() {
            continue;
        }
        let mut parser = Parser::new(src);
        let expr = parser.parse_expr(true);
        for err in &parser.errors {
            eprintln!("parse error: {err}");
        }
        if parser.errors.is_empty() {
            println!("{expr:#?}");
        }
    }
 }
--- a/fluxc/src/parser.rs
+++ b/fluxc/src/parser.rs
@@ -0,0 +1,617 @@
 use std::fmt;
 use crate::{
    ast::{BinaryOp, Expr, ExprKind, StructField, UnaryOp},
    lexer::Lexer,
    token::{Span, Token, TokenKind},
 };
 // ── Parse error ───────────────────────────────────────────────────────────────
 #[derive(Debug, Clone)]
 pub struct ParseError {
    pub span: Span,
    pub message: String,
 }
 impl fmt::Display for ParseError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "error at {}: {}", self.span, self.message)
    }
 }
 // ── Binding powers ─────────────────────────────────────────────────────────────
 //
 // Returns `(left_bp, right_bp)` for infix operators.
 // left_bp < right_bp  → left-associative
 // left_bp > right_bp  → right-associative (none here)
 //
 // NOTE: comparison operators (==, !=, <, >, <=, >=) are not listed in the
 // GRAMMAR.ebnf precedence table but appear in examples; placed between
 // bitwise-AND (50) and additive (60) at 55.
 fn infix_bp(kind: TokenKind) -> Option<(u8, u8)> {
    let bp = match kind {
        TokenKind::Or => (10, 11),
        TokenKind::And => (20, 21),
        TokenKind::Pipe => (30, 31),
        TokenKind::Caret => (40, 41),
        TokenKind::Amp => (50, 51),
        TokenKind::EqEq
        | TokenKind::BangEq
        | TokenKind::Lt
        | TokenKind::Gt
        | TokenKind::LtEq
        | TokenKind::GtEq => (55, 56),
        TokenKind::Plus | TokenKind::Minus => (60, 61),
        TokenKind::Star | TokenKind::Slash | TokenKind::Percent => (70, 71),
        // Postfix: `.`, `[`, `(` — handled separately in parse_led, bp listed
        // here only so callers can detect them as infix/postfix operators.
        TokenKind::Dot | TokenKind::LBracket | TokenKind::LParen => (90, 91),
        _ => return None,
    };
    Some(bp)
 }
 // Returns the right binding power for prefix operators.
 fn prefix_bp(kind: TokenKind) -> Option<u8> {
    match kind {
        TokenKind::Bang
        | TokenKind::Tilde
        | TokenKind::Minus
        | TokenKind::Star
        | TokenKind::Amp => Some(80),
        _ => None,
    }
 }
 fn token_to_unary_op(kind: TokenKind) -> UnaryOp {
    match kind {
        TokenKind::Minus => UnaryOp::Neg,
        TokenKind::Bang => UnaryOp::Not,
        TokenKind::Tilde => UnaryOp::BitNot,
        TokenKind::Star => UnaryOp::Deref,
        TokenKind::Amp => UnaryOp::AddrOf,
        _ => unreachable!("not a unary op: {:?}", kind),
    }
 }
 fn token_to_binary_op(kind: TokenKind) -> BinaryOp {
    match kind {
        TokenKind::Or => BinaryOp::Or,
        TokenKind::And => BinaryOp::And,
        TokenKind::Pipe => BinaryOp::BitOr,
        TokenKind::Caret => BinaryOp::BitXor,
        TokenKind::Amp => BinaryOp::BitAnd,
        TokenKind::EqEq => BinaryOp::Eq,
        TokenKind::BangEq => BinaryOp::Ne,
        TokenKind::Lt => BinaryOp::Lt,
        TokenKind::Gt => BinaryOp::Gt,
        TokenKind::LtEq => BinaryOp::Le,
        TokenKind::GtEq => BinaryOp::Ge,
        TokenKind::Plus => BinaryOp::Add,
        TokenKind::Minus => BinaryOp::Sub,
        TokenKind::Star => BinaryOp::Mul,
        TokenKind::Slash => BinaryOp::Div,
        TokenKind::Percent => BinaryOp::Rem,
        _ => unreachable!("not a binary op: {:?}", kind),
    }
 }
 // ── Parser ─────────────────────────────────────────────────────────────────────
 pub struct Parser<'src> {
    tokens: Vec<Token<'src>>,
    pos: usize,
    pub errors: Vec<ParseError>,
 }
 impl<'src> Parser<'src> {
    pub fn new(src: &'src str) -> Self {
        let tokens = Lexer::new(src).tokenize();
        Self {
            tokens,
            pos: 0,
            errors: Vec::new(),
        }
    }
    // ── Token access ──────────────────────────────────────────────────────────
    fn current(&self) -> Token<'src> {
        self.tokens[self.pos]
    }
    /// Advance past the current token and return it.
    fn advance(&mut self) -> Token<'src> {
        let tok = self.current();
        if tok.kind != TokenKind::Eof {
            self.pos += 1;
        }
        tok
    }
    /// Consume the current token if it matches `kind`; otherwise record an
    /// error and return a zero-width dummy token at the current position
    /// so that parsing can continue.
    fn expect(&mut self, kind: TokenKind) -> Token<'src> {
        let tok = self.current();
        if tok.kind == kind {
            self.advance()
        } else {
            let span = Span::new(tok.span.start, tok.span.start);
            self.errors.push(ParseError {
                span,
                message: format!("expected {}, found {}", kind, tok.kind),
            });
            Token::new(kind, span, "")
        }
    }
    // ── Public API ────────────────────────────────────────────────────────────
    /// Parse a single expression.
    ///
    /// `allow_struct_literals` controls whether a bare `Ident { … }` is
    /// parsed as a struct literal. Pass `false` in `if`/`while` conditions
    /// so that `{` is not consumed as a struct body.
    pub fn parse_expr(&mut self, allow_struct_literals: bool) -> Expr {
        self.pratt(0, allow_struct_literals)
    }
    // ── Pratt core ────────────────────────────────────────────────────────────
    fn pratt(&mut self, min_bp: u8, allow_struct_lit: bool) -> Expr {
        let mut lhs = self.parse_nud(allow_struct_lit);
        loop {
            let op_tok = self.current();
            // Struct literal: `Ident {` — only when the flag is set, and only
            // when the lhs is a bare identifier.
            if allow_struct_lit
                && op_tok.kind == TokenKind::LCurly
                && matches!(lhs.kind, ExprKind::Ident(_))
                && min_bp == 0
            {
                lhs = self.parse_struct_lit(lhs);
                continue;
            }
            let (l_bp, r_bp) = match infix_bp(op_tok.kind) {
                Some(bp) => bp,
                None => break,
            };
            if l_bp < min_bp {
                break;
            }
            lhs = self.parse_led(lhs, op_tok, r_bp, allow_struct_lit);
        }
        lhs
    }
    // ── Null denotation (prefix / primary) ───────────────────────────────────
    fn parse_nud(&mut self, allow_struct_lit: bool) -> Expr {
        let tok = self.advance();
        match tok.kind {
            // Literals
            TokenKind::IntLit => Expr::new(ExprKind::IntLit(tok.text.to_owned()), tok.span),
            TokenKind::FloatLit => Expr::new(ExprKind::FloatLit(tok.text.to_owned()), tok.span),
            TokenKind::StringLit => Expr::new(ExprKind::StringLit(tok.text.to_owned()), tok.span),
            TokenKind::CharLit => Expr::new(ExprKind::CharLit(tok.text.to_owned()), tok.span),
            TokenKind::True => Expr::new(ExprKind::Bool(true), tok.span),
            TokenKind::False => Expr::new(ExprKind::Bool(false), tok.span),
            // Identifier
            TokenKind::Ident => Expr::new(ExprKind::Ident(tok.text.to_owned()), tok.span),
            // Prefix unary
            kind if prefix_bp(kind).is_some() => {
                let r_bp = prefix_bp(kind).unwrap();
                let op = token_to_unary_op(kind);
                let operand = self.pratt(r_bp, allow_struct_lit);
                let span = tok.span.cover(operand.span);
                Expr::new(
                    ExprKind::Unary {
                        op,
                        op_span: tok.span,
                        expr: Box::new(operand),
                    },
                    span,
                )
            }
            // Grouped expression
            TokenKind::LParen => {
                // Inside parentheses struct literals are always allowed.
                let inner = self.pratt(0, true);
                let close = self.expect(TokenKind::RParen);
                let span = tok.span.cover(close.span);
                Expr::new(ExprKind::Group(Box::new(inner)), span)
            }
            // Error recovery
            _ => {
                self.errors.push(ParseError {
                    span: tok.span,
                    message: format!("unexpected token {} in expression", tok.kind),
                });
                Expr::new(ExprKind::Error, tok.span)
            }
        }
    }
    // ── Left denotation (infix / postfix) ────────────────────────────────────
    fn parse_led(
        &mut self,
        lhs: Expr,
        op_tok: Token<'src>,
        r_bp: u8,
        allow_struct_lit: bool,
    ) -> Expr {
        // Consume the operator token.
        self.advance();
        match op_tok.kind {
            // Field access: `expr.field`
            TokenKind::Dot => {
                let field_tok = self.expect(TokenKind::Ident);
                let span = lhs.span.cover(field_tok.span);
                Expr::new(
                    ExprKind::Field {
                        expr: Box::new(lhs),
                        field: field_tok.text.to_owned(),
                        field_span: field_tok.span,
                    },
                    span,
                )
            }
            // Index: `expr[index]`
            TokenKind::LBracket => {
                // Inside brackets struct literals are always allowed.
                let index = self.pratt(0, true);
                let close = self.expect(TokenKind::RBracket);
                let span = lhs.span.cover(close.span);
                Expr::new(
                    ExprKind::Index {
                        expr: Box::new(lhs),
                        index: Box::new(index),
                    },
                    span,
                )
            }
            // Call: `expr(args…)`
            TokenKind::LParen => {
                let (args, close_span) = self.parse_arg_list();
                let span = lhs.span.cover(close_span);
                Expr::new(
                    ExprKind::Call {
                        callee: Box::new(lhs),
                        args,
                    },
                    span,
                )
            }
            // Binary operator
            kind => {
                let op = token_to_binary_op(kind);
                let rhs = self.pratt(r_bp, allow_struct_lit);
                let span = lhs.span.cover(rhs.span);
                Expr::new(
                    ExprKind::Binary {
                        op,
                        op_span: op_tok.span,
                        lhs: Box::new(lhs),
                        rhs: Box::new(rhs),
                    },
                    span,
                )
            }
        }
    }
    // ── Struct literal ────────────────────────────────────────────────────────
    /// Called after we have already parsed the leading `Ident` as `lhs` and
    /// the current token is `{`.
    fn parse_struct_lit(&mut self, name_expr: Expr) -> Expr {
        let (name, name_span) = match name_expr.kind {
            ExprKind::Ident(ref s) => (s.clone(), name_expr.span),
            _ => unreachable!(),
        };
        self.advance(); // consume `{`
        let fields = self.parse_struct_field_list();
        let close = self.expect(TokenKind::RCurly);
        let span = name_span.cover(close.span);
        Expr::new(
            ExprKind::StructLit {
                name,
                name_span,
                fields,
            },
            span,
        )
    }
    fn parse_struct_field_list(&mut self) -> Vec<StructField> {
        let mut fields = Vec::new();
        loop {
            if matches!(self.current().kind, TokenKind::RCurly | TokenKind::Eof) {
                break;
            }
            fields.push(self.parse_struct_field());
            if self.current().kind == TokenKind::Comma {
                self.advance();
            } else {
                break;
            }
        }
        fields
    }
    fn parse_struct_field(&mut self) -> StructField {
        let name_tok = self.expect(TokenKind::Ident);
        self.expect(TokenKind::Colon);
        // Struct literals allowed inside field values.
        let value = self.pratt(0, true);
        StructField {
            name: name_tok.text.to_owned(),
            name_span: name_tok.span,
            value,
        }
    }
    // ── Argument list ─────────────────────────────────────────────────────────
    /// Parse `arg, arg, …` up to `)`. The opening `(` has already been
    /// consumed by `parse_led`. Returns `(args, close_span)`.
    fn parse_arg_list(&mut self) -> (Vec<Expr>, Span) {
        let mut args = Vec::new();
        loop {
            if matches!(self.current().kind, TokenKind::RParen | TokenKind::Eof) {
                break;
            }
            // Struct literals allowed inside argument lists.
            args.push(self.pratt(0, true));
            if self.current().kind == TokenKind::Comma {
                self.advance();
            } else {
                break;
            }
        }
        let close = self.expect(TokenKind::RParen);
        (args, close.span)
    }
 }
 // ── Tests ──────────────────────────────────────────────────────────────────────
 #[cfg(test)]
 mod tests {
    use super::*;
    fn parse(src: &str) -> Expr {
        Parser::new(src).parse_expr(true)
    }
    fn parse_no_struct(src: &str) -> Expr {
        Parser::new(src).parse_expr(false)
    }
    #[test]
    fn int_literal() {
        let expr = parse("42");
        assert!(matches!(expr.kind, ExprKind::IntLit(ref s) if s == "42"));
    }
    #[test]
    fn float_literal() {
        let expr = parse("3.14");
        assert!(matches!(expr.kind, ExprKind::FloatLit(ref s) if s == "3.14"));
    }
    #[test]
    fn bool_literals() {
        assert!(matches!(parse("true").kind, ExprKind::Bool(true)));
        assert!(matches!(parse("false").kind, ExprKind::Bool(false)));
    }
    #[test]
    fn ident() {
        let expr = parse("foo");
        assert!(matches!(expr.kind, ExprKind::Ident(ref s) if s == "foo"));
    }
    #[test]
    fn unary_neg() {
        let expr = parse("-42");
        assert!(matches!(
            expr.kind,
            ExprKind::Unary {
                op: UnaryOp::Neg,
                ..
            }
        ));
    }
    #[test]
    fn unary_not() {
        let expr = parse("!x");
        assert!(matches!(
            expr.kind,
            ExprKind::Unary {
                op: UnaryOp::Not,
                ..
            }
        ));
    }
    #[test]
    fn binary_add() {
        let expr = parse("a + b");
        assert!(matches!(
            expr.kind,
            ExprKind::Binary {
                op: BinaryOp::Add,
                ..
            }
        ));
    }
    #[test]
    fn binary_precedence() {
        // `a + b * c` should parse as `a + (b * c)`
        let expr = parse("a + b * c");
        match &expr.kind {
            ExprKind::Binary {
                op: BinaryOp::Add,
                lhs,
                rhs,
                ..
            } => {
                assert!(matches!(lhs.kind, ExprKind::Ident(ref s) if s == "a"));
                assert!(matches!(
                    rhs.kind,
                    ExprKind::Binary {
                        op: BinaryOp::Mul,
                        ..
                    }
                ));
            }
            _ => panic!("expected binary add, got {:?}", expr.kind),
        }
    }
    #[test]
    fn comparison() {
        let expr = parse("a == b");
        assert!(matches!(
            expr.kind,
            ExprKind::Binary {
                op: BinaryOp::Eq,
                ..
            }
        ));
    }
    #[test]
    fn logical_and_or() {
        // `a or b and c` → `a or (b and c)` (and binds tighter)
        let expr = parse("a or b and c");
        match &expr.kind {
            ExprKind::Binary {
                op: BinaryOp::Or,
                rhs,
                ..
            } => {
                assert!(matches!(
                    rhs.kind,
                    ExprKind::Binary {
                        op: BinaryOp::And,
                        ..
                    }
                ));
            }
            _ => panic!("expected or at top level"),
        }
    }
    #[test]
    fn grouped_expr() {
        let expr = parse("(a + b)");
        assert!(matches!(expr.kind, ExprKind::Group(_)));
    }
    #[test]
    fn field_access() {
        let expr = parse("foo.bar");
        assert!(matches!(expr.kind, ExprKind::Field { ref field, .. } if field == "bar"));
    }
    #[test]
    fn index_expr() {
        let expr = parse("arr[0]");
        assert!(matches!(expr.kind, ExprKind::Index { .. }));
    }
    #[test]
    fn call_no_args() {
        let expr = parse("foo()");
        match &expr.kind {
            ExprKind::Call { args, .. } => assert!(args.is_empty()),
            _ => panic!("expected call"),
        }
    }
    #[test]
    fn call_with_args() {
        let expr = parse("foo(1, 2, 3)");
        match &expr.kind {
            ExprKind::Call { args, .. } => assert_eq!(args.len(), 3),
            _ => panic!("expected call"),
        }
    }
    #[test]
    fn struct_literal() {
        let expr = parse("Foo { x: 1, y: 2 }");
        match &expr.kind {
            ExprKind::StructLit { name, fields, .. } => {
                assert_eq!(name, "Foo");
                assert_eq!(fields.len(), 2);
            }
            _ => panic!("expected struct literal, got {:?}", expr.kind),
        }
    }
    #[test]
    fn struct_literal_disabled() {
        // With allow_struct_literals=false, `Foo { ... }` should NOT be a
        // struct literal — the Ident is parsed alone and `{` is left unconsumed.
        let expr = parse_no_struct("Foo { x: 1 }");
        assert!(matches!(expr.kind, ExprKind::Ident(ref s) if s == "Foo"));
    }
    #[test]
    fn chained_field_access() {
        let expr = parse("a.b.c");
        match &expr.kind {
            ExprKind::Field {
                expr: inner, field, ..
            } => {
                assert_eq!(field, "c");
                assert!(matches!(inner.kind, ExprKind::Field { ref field, .. } if field == "b"));
            }
            _ => panic!("expected field access"),
        }
    }
    #[test]
    fn deref_and_addrof() {
        assert!(matches!(
            parse("*p").kind,
            ExprKind::Unary {
                op: UnaryOp::Deref,
                ..
            }
        ));
        assert!(matches!(
            parse("&x").kind,
            ExprKind::Unary {
                op: UnaryOp::AddrOf,
                ..
            }
        ));
    }
 }