Add expression AST and Pratt parser with REPL

- ast.rs: Expr/ExprKind with UnaryOp, BinaryOp, StructField
- parser.rs: Pratt expression parser with allow_struct_literals flag,
  error recovery via dummy tokens, and 19 unit tests
- main.rs: interactive expression REPL (prints parsed AST)
This commit is contained in:
2026-03-10 17:40:52 +01:00
parent 4f80de51b2
commit becc7a2d34
3 changed files with 767 additions and 6 deletions

113
fluxc/src/ast.rs Normal file
View File

@@ -0,0 +1,113 @@
use crate::token::Span;
// ── Operators ──────────────────────────────────────────────────────────────────
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UnaryOp {
Neg, // `-`
Not, // `!`
BitNot, // `~`
Deref, // `*`
AddrOf, // `&`
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BinaryOp {
// Logical
Or, // `or`
And, // `and`
// Bitwise
BitOr, // `|`
BitXor, // `^`
BitAnd, // `&`
// Comparison
Eq, // `==`
Ne, // `!=`
Lt, // `<`
Gt, // `>`
Le, // `<=`
Ge, // `>=`
// Arithmetic
Add, // `+`
Sub, // `-`
Mul, // `*`
Div, // `/`
Rem, // `%`
}
// ── Struct literal field ───────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct StructField {
pub name: String,
pub name_span: Span,
pub value: Expr,
}
// ── Expression ────────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct Expr {
pub kind: ExprKind,
pub span: Span,
}
impl Expr {
pub fn new(kind: ExprKind, span: Span) -> Self {
Self { kind, span }
}
}
#[derive(Debug, Clone)]
pub enum ExprKind {
// Literals
IntLit(String),
FloatLit(String),
StringLit(String),
CharLit(String),
Bool(bool),
// Identifier
Ident(String),
// Struct literal: `Foo { x: 1, y: 2 }`
StructLit {
name: String,
name_span: Span,
fields: Vec<StructField>,
},
// Operators
Unary {
op: UnaryOp,
op_span: Span,
expr: Box<Expr>,
},
Binary {
op: BinaryOp,
op_span: Span,
lhs: Box<Expr>,
rhs: Box<Expr>,
},
// Postfix
Field {
expr: Box<Expr>,
field: String,
field_span: Span,
},
Index {
expr: Box<Expr>,
index: Box<Expr>,
},
Call {
callee: Box<Expr>,
args: Vec<Expr>,
},
// Parenthesised expression
Group(Box<Expr>),
// Placeholder for parse errors — allows parsing to continue
Error,
}

View File

@@ -1,15 +1,46 @@
use std::{env::args, fs}; use std::io::{self, BufRead, Write};
use crate::lexer::Lexer; use crate::parser::Parser;
pub mod ast;
pub mod lexer; pub mod lexer;
pub mod parser;
pub mod token; pub mod token;
fn main() { fn main() {
let path = args().nth(1).expect("usage: fluxc <file>"); let stdin = io::stdin();
let content = fs::read_to_string(&path).expect("error: failed to read file"); let stdout = io::stdout();
for token in Lexer::new(&content) { println!("flux expression REPL (ctrl+d to exit)");
println!("{token:?}");
loop {
print!("> ");
stdout.lock().flush().unwrap();
let mut line = String::new();
match stdin.lock().read_line(&mut line) {
Ok(0) => break, // EOF
Ok(_) => {}
Err(e) => {
eprintln!("error: {e}");
break;
}
}
let src = line.trim();
if src.is_empty() {
continue;
}
let mut parser = Parser::new(src);
let expr = parser.parse_expr(true);
for err in &parser.errors {
eprintln!("parse error: {err}");
}
if parser.errors.is_empty() {
println!("{expr:#?}");
}
} }
} }

617
fluxc/src/parser.rs Normal file
View File

@@ -0,0 +1,617 @@
use std::fmt;
use crate::{
ast::{BinaryOp, Expr, ExprKind, StructField, UnaryOp},
lexer::Lexer,
token::{Span, Token, TokenKind},
};
// ── Parse error ───────────────────────────────────────────────────────────────
#[derive(Debug, Clone)]
pub struct ParseError {
pub span: Span,
pub message: String,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "error at {}: {}", self.span, self.message)
}
}
// ── Binding powers ─────────────────────────────────────────────────────────────
//
// Returns `(left_bp, right_bp)` for infix operators.
// left_bp < right_bp → left-associative
// left_bp > right_bp → right-associative (none here)
//
// NOTE: comparison operators (==, !=, <, >, <=, >=) are not listed in the
// GRAMMAR.ebnf precedence table but appear in examples; placed between
// bitwise-AND (50) and additive (60) at 55.
fn infix_bp(kind: TokenKind) -> Option<(u8, u8)> {
let bp = match kind {
TokenKind::Or => (10, 11),
TokenKind::And => (20, 21),
TokenKind::Pipe => (30, 31),
TokenKind::Caret => (40, 41),
TokenKind::Amp => (50, 51),
TokenKind::EqEq
| TokenKind::BangEq
| TokenKind::Lt
| TokenKind::Gt
| TokenKind::LtEq
| TokenKind::GtEq => (55, 56),
TokenKind::Plus | TokenKind::Minus => (60, 61),
TokenKind::Star | TokenKind::Slash | TokenKind::Percent => (70, 71),
// Postfix: `.`, `[`, `(` — handled separately in parse_led, bp listed
// here only so callers can detect them as infix/postfix operators.
TokenKind::Dot | TokenKind::LBracket | TokenKind::LParen => (90, 91),
_ => return None,
};
Some(bp)
}
// Returns the right binding power for prefix operators.
fn prefix_bp(kind: TokenKind) -> Option<u8> {
match kind {
TokenKind::Bang
| TokenKind::Tilde
| TokenKind::Minus
| TokenKind::Star
| TokenKind::Amp => Some(80),
_ => None,
}
}
fn token_to_unary_op(kind: TokenKind) -> UnaryOp {
match kind {
TokenKind::Minus => UnaryOp::Neg,
TokenKind::Bang => UnaryOp::Not,
TokenKind::Tilde => UnaryOp::BitNot,
TokenKind::Star => UnaryOp::Deref,
TokenKind::Amp => UnaryOp::AddrOf,
_ => unreachable!("not a unary op: {:?}", kind),
}
}
fn token_to_binary_op(kind: TokenKind) -> BinaryOp {
match kind {
TokenKind::Or => BinaryOp::Or,
TokenKind::And => BinaryOp::And,
TokenKind::Pipe => BinaryOp::BitOr,
TokenKind::Caret => BinaryOp::BitXor,
TokenKind::Amp => BinaryOp::BitAnd,
TokenKind::EqEq => BinaryOp::Eq,
TokenKind::BangEq => BinaryOp::Ne,
TokenKind::Lt => BinaryOp::Lt,
TokenKind::Gt => BinaryOp::Gt,
TokenKind::LtEq => BinaryOp::Le,
TokenKind::GtEq => BinaryOp::Ge,
TokenKind::Plus => BinaryOp::Add,
TokenKind::Minus => BinaryOp::Sub,
TokenKind::Star => BinaryOp::Mul,
TokenKind::Slash => BinaryOp::Div,
TokenKind::Percent => BinaryOp::Rem,
_ => unreachable!("not a binary op: {:?}", kind),
}
}
// ── Parser ─────────────────────────────────────────────────────────────────────
pub struct Parser<'src> {
tokens: Vec<Token<'src>>,
pos: usize,
pub errors: Vec<ParseError>,
}
impl<'src> Parser<'src> {
pub fn new(src: &'src str) -> Self {
let tokens = Lexer::new(src).tokenize();
Self {
tokens,
pos: 0,
errors: Vec::new(),
}
}
// ── Token access ──────────────────────────────────────────────────────────
fn current(&self) -> Token<'src> {
self.tokens[self.pos]
}
/// Advance past the current token and return it.
fn advance(&mut self) -> Token<'src> {
let tok = self.current();
if tok.kind != TokenKind::Eof {
self.pos += 1;
}
tok
}
/// Consume the current token if it matches `kind`; otherwise record an
/// error and return a zero-width dummy token at the current position
/// so that parsing can continue.
fn expect(&mut self, kind: TokenKind) -> Token<'src> {
let tok = self.current();
if tok.kind == kind {
self.advance()
} else {
let span = Span::new(tok.span.start, tok.span.start);
self.errors.push(ParseError {
span,
message: format!("expected {}, found {}", kind, tok.kind),
});
Token::new(kind, span, "")
}
}
// ── Public API ────────────────────────────────────────────────────────────
/// Parse a single expression.
///
/// `allow_struct_literals` controls whether a bare `Ident { … }` is
/// parsed as a struct literal. Pass `false` in `if`/`while` conditions
/// so that `{` is not consumed as a struct body.
pub fn parse_expr(&mut self, allow_struct_literals: bool) -> Expr {
self.pratt(0, allow_struct_literals)
}
// ── Pratt core ────────────────────────────────────────────────────────────
fn pratt(&mut self, min_bp: u8, allow_struct_lit: bool) -> Expr {
let mut lhs = self.parse_nud(allow_struct_lit);
loop {
let op_tok = self.current();
// Struct literal: `Ident {` — only when the flag is set, and only
// when the lhs is a bare identifier.
if allow_struct_lit
&& op_tok.kind == TokenKind::LCurly
&& matches!(lhs.kind, ExprKind::Ident(_))
&& min_bp == 0
{
lhs = self.parse_struct_lit(lhs);
continue;
}
let (l_bp, r_bp) = match infix_bp(op_tok.kind) {
Some(bp) => bp,
None => break,
};
if l_bp < min_bp {
break;
}
lhs = self.parse_led(lhs, op_tok, r_bp, allow_struct_lit);
}
lhs
}
// ── Null denotation (prefix / primary) ───────────────────────────────────
fn parse_nud(&mut self, allow_struct_lit: bool) -> Expr {
let tok = self.advance();
match tok.kind {
// Literals
TokenKind::IntLit => Expr::new(ExprKind::IntLit(tok.text.to_owned()), tok.span),
TokenKind::FloatLit => Expr::new(ExprKind::FloatLit(tok.text.to_owned()), tok.span),
TokenKind::StringLit => Expr::new(ExprKind::StringLit(tok.text.to_owned()), tok.span),
TokenKind::CharLit => Expr::new(ExprKind::CharLit(tok.text.to_owned()), tok.span),
TokenKind::True => Expr::new(ExprKind::Bool(true), tok.span),
TokenKind::False => Expr::new(ExprKind::Bool(false), tok.span),
// Identifier
TokenKind::Ident => Expr::new(ExprKind::Ident(tok.text.to_owned()), tok.span),
// Prefix unary
kind if prefix_bp(kind).is_some() => {
let r_bp = prefix_bp(kind).unwrap();
let op = token_to_unary_op(kind);
let operand = self.pratt(r_bp, allow_struct_lit);
let span = tok.span.cover(operand.span);
Expr::new(
ExprKind::Unary {
op,
op_span: tok.span,
expr: Box::new(operand),
},
span,
)
}
// Grouped expression
TokenKind::LParen => {
// Inside parentheses struct literals are always allowed.
let inner = self.pratt(0, true);
let close = self.expect(TokenKind::RParen);
let span = tok.span.cover(close.span);
Expr::new(ExprKind::Group(Box::new(inner)), span)
}
// Error recovery
_ => {
self.errors.push(ParseError {
span: tok.span,
message: format!("unexpected token {} in expression", tok.kind),
});
Expr::new(ExprKind::Error, tok.span)
}
}
}
// ── Left denotation (infix / postfix) ────────────────────────────────────
fn parse_led(
&mut self,
lhs: Expr,
op_tok: Token<'src>,
r_bp: u8,
allow_struct_lit: bool,
) -> Expr {
// Consume the operator token.
self.advance();
match op_tok.kind {
// Field access: `expr.field`
TokenKind::Dot => {
let field_tok = self.expect(TokenKind::Ident);
let span = lhs.span.cover(field_tok.span);
Expr::new(
ExprKind::Field {
expr: Box::new(lhs),
field: field_tok.text.to_owned(),
field_span: field_tok.span,
},
span,
)
}
// Index: `expr[index]`
TokenKind::LBracket => {
// Inside brackets struct literals are always allowed.
let index = self.pratt(0, true);
let close = self.expect(TokenKind::RBracket);
let span = lhs.span.cover(close.span);
Expr::new(
ExprKind::Index {
expr: Box::new(lhs),
index: Box::new(index),
},
span,
)
}
// Call: `expr(args…)`
TokenKind::LParen => {
let (args, close_span) = self.parse_arg_list();
let span = lhs.span.cover(close_span);
Expr::new(
ExprKind::Call {
callee: Box::new(lhs),
args,
},
span,
)
}
// Binary operator
kind => {
let op = token_to_binary_op(kind);
let rhs = self.pratt(r_bp, allow_struct_lit);
let span = lhs.span.cover(rhs.span);
Expr::new(
ExprKind::Binary {
op,
op_span: op_tok.span,
lhs: Box::new(lhs),
rhs: Box::new(rhs),
},
span,
)
}
}
}
// ── Struct literal ────────────────────────────────────────────────────────
/// Called after we have already parsed the leading `Ident` as `lhs` and
/// the current token is `{`.
fn parse_struct_lit(&mut self, name_expr: Expr) -> Expr {
let (name, name_span) = match name_expr.kind {
ExprKind::Ident(ref s) => (s.clone(), name_expr.span),
_ => unreachable!(),
};
self.advance(); // consume `{`
let fields = self.parse_struct_field_list();
let close = self.expect(TokenKind::RCurly);
let span = name_span.cover(close.span);
Expr::new(
ExprKind::StructLit {
name,
name_span,
fields,
},
span,
)
}
fn parse_struct_field_list(&mut self) -> Vec<StructField> {
let mut fields = Vec::new();
loop {
if matches!(self.current().kind, TokenKind::RCurly | TokenKind::Eof) {
break;
}
fields.push(self.parse_struct_field());
if self.current().kind == TokenKind::Comma {
self.advance();
} else {
break;
}
}
fields
}
fn parse_struct_field(&mut self) -> StructField {
let name_tok = self.expect(TokenKind::Ident);
self.expect(TokenKind::Colon);
// Struct literals allowed inside field values.
let value = self.pratt(0, true);
StructField {
name: name_tok.text.to_owned(),
name_span: name_tok.span,
value,
}
}
// ── Argument list ─────────────────────────────────────────────────────────
/// Parse `arg, arg, …` up to `)`. The opening `(` has already been
/// consumed by `parse_led`. Returns `(args, close_span)`.
fn parse_arg_list(&mut self) -> (Vec<Expr>, Span) {
let mut args = Vec::new();
loop {
if matches!(self.current().kind, TokenKind::RParen | TokenKind::Eof) {
break;
}
// Struct literals allowed inside argument lists.
args.push(self.pratt(0, true));
if self.current().kind == TokenKind::Comma {
self.advance();
} else {
break;
}
}
let close = self.expect(TokenKind::RParen);
(args, close.span)
}
}
// ── Tests ──────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
fn parse(src: &str) -> Expr {
Parser::new(src).parse_expr(true)
}
fn parse_no_struct(src: &str) -> Expr {
Parser::new(src).parse_expr(false)
}
#[test]
fn int_literal() {
let expr = parse("42");
assert!(matches!(expr.kind, ExprKind::IntLit(ref s) if s == "42"));
}
#[test]
fn float_literal() {
let expr = parse("3.14");
assert!(matches!(expr.kind, ExprKind::FloatLit(ref s) if s == "3.14"));
}
#[test]
fn bool_literals() {
assert!(matches!(parse("true").kind, ExprKind::Bool(true)));
assert!(matches!(parse("false").kind, ExprKind::Bool(false)));
}
#[test]
fn ident() {
let expr = parse("foo");
assert!(matches!(expr.kind, ExprKind::Ident(ref s) if s == "foo"));
}
#[test]
fn unary_neg() {
let expr = parse("-42");
assert!(matches!(
expr.kind,
ExprKind::Unary {
op: UnaryOp::Neg,
..
}
));
}
#[test]
fn unary_not() {
let expr = parse("!x");
assert!(matches!(
expr.kind,
ExprKind::Unary {
op: UnaryOp::Not,
..
}
));
}
#[test]
fn binary_add() {
let expr = parse("a + b");
assert!(matches!(
expr.kind,
ExprKind::Binary {
op: BinaryOp::Add,
..
}
));
}
#[test]
fn binary_precedence() {
// `a + b * c` should parse as `a + (b * c)`
let expr = parse("a + b * c");
match &expr.kind {
ExprKind::Binary {
op: BinaryOp::Add,
lhs,
rhs,
..
} => {
assert!(matches!(lhs.kind, ExprKind::Ident(ref s) if s == "a"));
assert!(matches!(
rhs.kind,
ExprKind::Binary {
op: BinaryOp::Mul,
..
}
));
}
_ => panic!("expected binary add, got {:?}", expr.kind),
}
}
#[test]
fn comparison() {
let expr = parse("a == b");
assert!(matches!(
expr.kind,
ExprKind::Binary {
op: BinaryOp::Eq,
..
}
));
}
#[test]
fn logical_and_or() {
// `a or b and c` → `a or (b and c)` (and binds tighter)
let expr = parse("a or b and c");
match &expr.kind {
ExprKind::Binary {
op: BinaryOp::Or,
rhs,
..
} => {
assert!(matches!(
rhs.kind,
ExprKind::Binary {
op: BinaryOp::And,
..
}
));
}
_ => panic!("expected or at top level"),
}
}
#[test]
fn grouped_expr() {
let expr = parse("(a + b)");
assert!(matches!(expr.kind, ExprKind::Group(_)));
}
#[test]
fn field_access() {
let expr = parse("foo.bar");
assert!(matches!(expr.kind, ExprKind::Field { ref field, .. } if field == "bar"));
}
#[test]
fn index_expr() {
let expr = parse("arr[0]");
assert!(matches!(expr.kind, ExprKind::Index { .. }));
}
#[test]
fn call_no_args() {
let expr = parse("foo()");
match &expr.kind {
ExprKind::Call { args, .. } => assert!(args.is_empty()),
_ => panic!("expected call"),
}
}
#[test]
fn call_with_args() {
let expr = parse("foo(1, 2, 3)");
match &expr.kind {
ExprKind::Call { args, .. } => assert_eq!(args.len(), 3),
_ => panic!("expected call"),
}
}
#[test]
fn struct_literal() {
let expr = parse("Foo { x: 1, y: 2 }");
match &expr.kind {
ExprKind::StructLit { name, fields, .. } => {
assert_eq!(name, "Foo");
assert_eq!(fields.len(), 2);
}
_ => panic!("expected struct literal, got {:?}", expr.kind),
}
}
#[test]
fn struct_literal_disabled() {
// With allow_struct_literals=false, `Foo { ... }` should NOT be a
// struct literal — the Ident is parsed alone and `{` is left unconsumed.
let expr = parse_no_struct("Foo { x: 1 }");
assert!(matches!(expr.kind, ExprKind::Ident(ref s) if s == "Foo"));
}
#[test]
fn chained_field_access() {
let expr = parse("a.b.c");
match &expr.kind {
ExprKind::Field {
expr: inner, field, ..
} => {
assert_eq!(field, "c");
assert!(matches!(inner.kind, ExprKind::Field { ref field, .. } if field == "b"));
}
_ => panic!("expected field access"),
}
}
#[test]
fn deref_and_addrof() {
assert!(matches!(
parse("*p").kind,
ExprKind::Unary {
op: UnaryOp::Deref,
..
}
));
assert!(matches!(
parse("&x").kind,
ExprKind::Unary {
op: UnaryOp::AddrOf,
..
}
));
}
}