Add expression AST and Pratt parser with REPL
- ast.rs: Expr/ExprKind with UnaryOp, BinaryOp, StructField - parser.rs: Pratt expression parser with allow_struct_literals flag, error recovery via dummy tokens, and 19 unit tests - main.rs: interactive expression REPL (prints parsed AST)
This commit is contained in:
617
fluxc/src/parser.rs
Normal file
617
fluxc/src/parser.rs
Normal file
@@ -0,0 +1,617 @@
|
||||
use std::fmt;
|
||||
|
||||
use crate::{
|
||||
ast::{BinaryOp, Expr, ExprKind, StructField, UnaryOp},
|
||||
lexer::Lexer,
|
||||
token::{Span, Token, TokenKind},
|
||||
};
|
||||
|
||||
// ── Parse error ───────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParseError {
|
||||
pub span: Span,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
impl fmt::Display for ParseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "error at {}: {}", self.span, self.message)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Binding powers ─────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Returns `(left_bp, right_bp)` for infix operators.
|
||||
// left_bp < right_bp → left-associative
|
||||
// left_bp > right_bp → right-associative (none here)
|
||||
//
|
||||
// NOTE: comparison operators (==, !=, <, >, <=, >=) are not listed in the
|
||||
// GRAMMAR.ebnf precedence table but appear in examples; placed between
|
||||
// bitwise-AND (50) and additive (60) at 55.
|
||||
|
||||
fn infix_bp(kind: TokenKind) -> Option<(u8, u8)> {
|
||||
let bp = match kind {
|
||||
TokenKind::Or => (10, 11),
|
||||
TokenKind::And => (20, 21),
|
||||
TokenKind::Pipe => (30, 31),
|
||||
TokenKind::Caret => (40, 41),
|
||||
TokenKind::Amp => (50, 51),
|
||||
TokenKind::EqEq
|
||||
| TokenKind::BangEq
|
||||
| TokenKind::Lt
|
||||
| TokenKind::Gt
|
||||
| TokenKind::LtEq
|
||||
| TokenKind::GtEq => (55, 56),
|
||||
TokenKind::Plus | TokenKind::Minus => (60, 61),
|
||||
TokenKind::Star | TokenKind::Slash | TokenKind::Percent => (70, 71),
|
||||
// Postfix: `.`, `[`, `(` — handled separately in parse_led, bp listed
|
||||
// here only so callers can detect them as infix/postfix operators.
|
||||
TokenKind::Dot | TokenKind::LBracket | TokenKind::LParen => (90, 91),
|
||||
_ => return None,
|
||||
};
|
||||
Some(bp)
|
||||
}
|
||||
|
||||
// Returns the right binding power for prefix operators.
|
||||
fn prefix_bp(kind: TokenKind) -> Option<u8> {
|
||||
match kind {
|
||||
TokenKind::Bang
|
||||
| TokenKind::Tilde
|
||||
| TokenKind::Minus
|
||||
| TokenKind::Star
|
||||
| TokenKind::Amp => Some(80),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn token_to_unary_op(kind: TokenKind) -> UnaryOp {
|
||||
match kind {
|
||||
TokenKind::Minus => UnaryOp::Neg,
|
||||
TokenKind::Bang => UnaryOp::Not,
|
||||
TokenKind::Tilde => UnaryOp::BitNot,
|
||||
TokenKind::Star => UnaryOp::Deref,
|
||||
TokenKind::Amp => UnaryOp::AddrOf,
|
||||
_ => unreachable!("not a unary op: {:?}", kind),
|
||||
}
|
||||
}
|
||||
|
||||
fn token_to_binary_op(kind: TokenKind) -> BinaryOp {
|
||||
match kind {
|
||||
TokenKind::Or => BinaryOp::Or,
|
||||
TokenKind::And => BinaryOp::And,
|
||||
TokenKind::Pipe => BinaryOp::BitOr,
|
||||
TokenKind::Caret => BinaryOp::BitXor,
|
||||
TokenKind::Amp => BinaryOp::BitAnd,
|
||||
TokenKind::EqEq => BinaryOp::Eq,
|
||||
TokenKind::BangEq => BinaryOp::Ne,
|
||||
TokenKind::Lt => BinaryOp::Lt,
|
||||
TokenKind::Gt => BinaryOp::Gt,
|
||||
TokenKind::LtEq => BinaryOp::Le,
|
||||
TokenKind::GtEq => BinaryOp::Ge,
|
||||
TokenKind::Plus => BinaryOp::Add,
|
||||
TokenKind::Minus => BinaryOp::Sub,
|
||||
TokenKind::Star => BinaryOp::Mul,
|
||||
TokenKind::Slash => BinaryOp::Div,
|
||||
TokenKind::Percent => BinaryOp::Rem,
|
||||
_ => unreachable!("not a binary op: {:?}", kind),
|
||||
}
|
||||
}
|
||||
|
||||
// ── Parser ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
pub struct Parser<'src> {
|
||||
tokens: Vec<Token<'src>>,
|
||||
pos: usize,
|
||||
pub errors: Vec<ParseError>,
|
||||
}
|
||||
|
||||
impl<'src> Parser<'src> {
|
||||
pub fn new(src: &'src str) -> Self {
|
||||
let tokens = Lexer::new(src).tokenize();
|
||||
Self {
|
||||
tokens,
|
||||
pos: 0,
|
||||
errors: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
// ── Token access ──────────────────────────────────────────────────────────
|
||||
|
||||
fn current(&self) -> Token<'src> {
|
||||
self.tokens[self.pos]
|
||||
}
|
||||
|
||||
/// Advance past the current token and return it.
|
||||
fn advance(&mut self) -> Token<'src> {
|
||||
let tok = self.current();
|
||||
if tok.kind != TokenKind::Eof {
|
||||
self.pos += 1;
|
||||
}
|
||||
tok
|
||||
}
|
||||
|
||||
/// Consume the current token if it matches `kind`; otherwise record an
|
||||
/// error and return a zero-width dummy token at the current position
|
||||
/// so that parsing can continue.
|
||||
fn expect(&mut self, kind: TokenKind) -> Token<'src> {
|
||||
let tok = self.current();
|
||||
if tok.kind == kind {
|
||||
self.advance()
|
||||
} else {
|
||||
let span = Span::new(tok.span.start, tok.span.start);
|
||||
self.errors.push(ParseError {
|
||||
span,
|
||||
message: format!("expected {}, found {}", kind, tok.kind),
|
||||
});
|
||||
Token::new(kind, span, "")
|
||||
}
|
||||
}
|
||||
|
||||
// ── Public API ────────────────────────────────────────────────────────────
|
||||
|
||||
/// Parse a single expression.
|
||||
///
|
||||
/// `allow_struct_literals` controls whether a bare `Ident { … }` is
|
||||
/// parsed as a struct literal. Pass `false` in `if`/`while` conditions
|
||||
/// so that `{` is not consumed as a struct body.
|
||||
pub fn parse_expr(&mut self, allow_struct_literals: bool) -> Expr {
|
||||
self.pratt(0, allow_struct_literals)
|
||||
}
|
||||
|
||||
// ── Pratt core ────────────────────────────────────────────────────────────
|
||||
|
||||
fn pratt(&mut self, min_bp: u8, allow_struct_lit: bool) -> Expr {
|
||||
let mut lhs = self.parse_nud(allow_struct_lit);
|
||||
|
||||
loop {
|
||||
let op_tok = self.current();
|
||||
|
||||
// Struct literal: `Ident {` — only when the flag is set, and only
|
||||
// when the lhs is a bare identifier.
|
||||
if allow_struct_lit
|
||||
&& op_tok.kind == TokenKind::LCurly
|
||||
&& matches!(lhs.kind, ExprKind::Ident(_))
|
||||
&& min_bp == 0
|
||||
{
|
||||
lhs = self.parse_struct_lit(lhs);
|
||||
continue;
|
||||
}
|
||||
|
||||
let (l_bp, r_bp) = match infix_bp(op_tok.kind) {
|
||||
Some(bp) => bp,
|
||||
None => break,
|
||||
};
|
||||
|
||||
if l_bp < min_bp {
|
||||
break;
|
||||
}
|
||||
|
||||
lhs = self.parse_led(lhs, op_tok, r_bp, allow_struct_lit);
|
||||
}
|
||||
|
||||
lhs
|
||||
}
|
||||
|
||||
// ── Null denotation (prefix / primary) ───────────────────────────────────
|
||||
|
||||
fn parse_nud(&mut self, allow_struct_lit: bool) -> Expr {
|
||||
let tok = self.advance();
|
||||
match tok.kind {
|
||||
// Literals
|
||||
TokenKind::IntLit => Expr::new(ExprKind::IntLit(tok.text.to_owned()), tok.span),
|
||||
TokenKind::FloatLit => Expr::new(ExprKind::FloatLit(tok.text.to_owned()), tok.span),
|
||||
TokenKind::StringLit => Expr::new(ExprKind::StringLit(tok.text.to_owned()), tok.span),
|
||||
TokenKind::CharLit => Expr::new(ExprKind::CharLit(tok.text.to_owned()), tok.span),
|
||||
TokenKind::True => Expr::new(ExprKind::Bool(true), tok.span),
|
||||
TokenKind::False => Expr::new(ExprKind::Bool(false), tok.span),
|
||||
|
||||
// Identifier
|
||||
TokenKind::Ident => Expr::new(ExprKind::Ident(tok.text.to_owned()), tok.span),
|
||||
|
||||
// Prefix unary
|
||||
kind if prefix_bp(kind).is_some() => {
|
||||
let r_bp = prefix_bp(kind).unwrap();
|
||||
let op = token_to_unary_op(kind);
|
||||
let operand = self.pratt(r_bp, allow_struct_lit);
|
||||
let span = tok.span.cover(operand.span);
|
||||
Expr::new(
|
||||
ExprKind::Unary {
|
||||
op,
|
||||
op_span: tok.span,
|
||||
expr: Box::new(operand),
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
|
||||
// Grouped expression
|
||||
TokenKind::LParen => {
|
||||
// Inside parentheses struct literals are always allowed.
|
||||
let inner = self.pratt(0, true);
|
||||
let close = self.expect(TokenKind::RParen);
|
||||
let span = tok.span.cover(close.span);
|
||||
Expr::new(ExprKind::Group(Box::new(inner)), span)
|
||||
}
|
||||
|
||||
// Error recovery
|
||||
_ => {
|
||||
self.errors.push(ParseError {
|
||||
span: tok.span,
|
||||
message: format!("unexpected token {} in expression", tok.kind),
|
||||
});
|
||||
Expr::new(ExprKind::Error, tok.span)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Left denotation (infix / postfix) ────────────────────────────────────
|
||||
|
||||
fn parse_led(
|
||||
&mut self,
|
||||
lhs: Expr,
|
||||
op_tok: Token<'src>,
|
||||
r_bp: u8,
|
||||
allow_struct_lit: bool,
|
||||
) -> Expr {
|
||||
// Consume the operator token.
|
||||
self.advance();
|
||||
|
||||
match op_tok.kind {
|
||||
// Field access: `expr.field`
|
||||
TokenKind::Dot => {
|
||||
let field_tok = self.expect(TokenKind::Ident);
|
||||
let span = lhs.span.cover(field_tok.span);
|
||||
Expr::new(
|
||||
ExprKind::Field {
|
||||
expr: Box::new(lhs),
|
||||
field: field_tok.text.to_owned(),
|
||||
field_span: field_tok.span,
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
|
||||
// Index: `expr[index]`
|
||||
TokenKind::LBracket => {
|
||||
// Inside brackets struct literals are always allowed.
|
||||
let index = self.pratt(0, true);
|
||||
let close = self.expect(TokenKind::RBracket);
|
||||
let span = lhs.span.cover(close.span);
|
||||
Expr::new(
|
||||
ExprKind::Index {
|
||||
expr: Box::new(lhs),
|
||||
index: Box::new(index),
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
|
||||
// Call: `expr(args…)`
|
||||
TokenKind::LParen => {
|
||||
let (args, close_span) = self.parse_arg_list();
|
||||
let span = lhs.span.cover(close_span);
|
||||
Expr::new(
|
||||
ExprKind::Call {
|
||||
callee: Box::new(lhs),
|
||||
args,
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
|
||||
// Binary operator
|
||||
kind => {
|
||||
let op = token_to_binary_op(kind);
|
||||
let rhs = self.pratt(r_bp, allow_struct_lit);
|
||||
let span = lhs.span.cover(rhs.span);
|
||||
Expr::new(
|
||||
ExprKind::Binary {
|
||||
op,
|
||||
op_span: op_tok.span,
|
||||
lhs: Box::new(lhs),
|
||||
rhs: Box::new(rhs),
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Struct literal ────────────────────────────────────────────────────────
|
||||
|
||||
/// Called after we have already parsed the leading `Ident` as `lhs` and
|
||||
/// the current token is `{`.
|
||||
fn parse_struct_lit(&mut self, name_expr: Expr) -> Expr {
|
||||
let (name, name_span) = match name_expr.kind {
|
||||
ExprKind::Ident(ref s) => (s.clone(), name_expr.span),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
self.advance(); // consume `{`
|
||||
|
||||
let fields = self.parse_struct_field_list();
|
||||
|
||||
let close = self.expect(TokenKind::RCurly);
|
||||
let span = name_span.cover(close.span);
|
||||
Expr::new(
|
||||
ExprKind::StructLit {
|
||||
name,
|
||||
name_span,
|
||||
fields,
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_struct_field_list(&mut self) -> Vec<StructField> {
|
||||
let mut fields = Vec::new();
|
||||
loop {
|
||||
if matches!(self.current().kind, TokenKind::RCurly | TokenKind::Eof) {
|
||||
break;
|
||||
}
|
||||
fields.push(self.parse_struct_field());
|
||||
if self.current().kind == TokenKind::Comma {
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
fields
|
||||
}
|
||||
|
||||
fn parse_struct_field(&mut self) -> StructField {
|
||||
let name_tok = self.expect(TokenKind::Ident);
|
||||
self.expect(TokenKind::Colon);
|
||||
// Struct literals allowed inside field values.
|
||||
let value = self.pratt(0, true);
|
||||
StructField {
|
||||
name: name_tok.text.to_owned(),
|
||||
name_span: name_tok.span,
|
||||
value,
|
||||
}
|
||||
}
|
||||
|
||||
// ── Argument list ─────────────────────────────────────────────────────────
|
||||
|
||||
/// Parse `arg, arg, …` up to `)`. The opening `(` has already been
|
||||
/// consumed by `parse_led`. Returns `(args, close_span)`.
|
||||
fn parse_arg_list(&mut self) -> (Vec<Expr>, Span) {
|
||||
let mut args = Vec::new();
|
||||
loop {
|
||||
if matches!(self.current().kind, TokenKind::RParen | TokenKind::Eof) {
|
||||
break;
|
||||
}
|
||||
// Struct literals allowed inside argument lists.
|
||||
args.push(self.pratt(0, true));
|
||||
if self.current().kind == TokenKind::Comma {
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let close = self.expect(TokenKind::RParen);
|
||||
(args, close.span)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Tests ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse(src: &str) -> Expr {
|
||||
Parser::new(src).parse_expr(true)
|
||||
}
|
||||
|
||||
fn parse_no_struct(src: &str) -> Expr {
|
||||
Parser::new(src).parse_expr(false)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn int_literal() {
|
||||
let expr = parse("42");
|
||||
assert!(matches!(expr.kind, ExprKind::IntLit(ref s) if s == "42"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float_literal() {
|
||||
let expr = parse("3.14");
|
||||
assert!(matches!(expr.kind, ExprKind::FloatLit(ref s) if s == "3.14"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bool_literals() {
|
||||
assert!(matches!(parse("true").kind, ExprKind::Bool(true)));
|
||||
assert!(matches!(parse("false").kind, ExprKind::Bool(false)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ident() {
|
||||
let expr = parse("foo");
|
||||
assert!(matches!(expr.kind, ExprKind::Ident(ref s) if s == "foo"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unary_neg() {
|
||||
let expr = parse("-42");
|
||||
assert!(matches!(
|
||||
expr.kind,
|
||||
ExprKind::Unary {
|
||||
op: UnaryOp::Neg,
|
||||
..
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unary_not() {
|
||||
let expr = parse("!x");
|
||||
assert!(matches!(
|
||||
expr.kind,
|
||||
ExprKind::Unary {
|
||||
op: UnaryOp::Not,
|
||||
..
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn binary_add() {
|
||||
let expr = parse("a + b");
|
||||
assert!(matches!(
|
||||
expr.kind,
|
||||
ExprKind::Binary {
|
||||
op: BinaryOp::Add,
|
||||
..
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn binary_precedence() {
|
||||
// `a + b * c` should parse as `a + (b * c)`
|
||||
let expr = parse("a + b * c");
|
||||
match &expr.kind {
|
||||
ExprKind::Binary {
|
||||
op: BinaryOp::Add,
|
||||
lhs,
|
||||
rhs,
|
||||
..
|
||||
} => {
|
||||
assert!(matches!(lhs.kind, ExprKind::Ident(ref s) if s == "a"));
|
||||
assert!(matches!(
|
||||
rhs.kind,
|
||||
ExprKind::Binary {
|
||||
op: BinaryOp::Mul,
|
||||
..
|
||||
}
|
||||
));
|
||||
}
|
||||
_ => panic!("expected binary add, got {:?}", expr.kind),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn comparison() {
|
||||
let expr = parse("a == b");
|
||||
assert!(matches!(
|
||||
expr.kind,
|
||||
ExprKind::Binary {
|
||||
op: BinaryOp::Eq,
|
||||
..
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn logical_and_or() {
|
||||
// `a or b and c` → `a or (b and c)` (and binds tighter)
|
||||
let expr = parse("a or b and c");
|
||||
match &expr.kind {
|
||||
ExprKind::Binary {
|
||||
op: BinaryOp::Or,
|
||||
rhs,
|
||||
..
|
||||
} => {
|
||||
assert!(matches!(
|
||||
rhs.kind,
|
||||
ExprKind::Binary {
|
||||
op: BinaryOp::And,
|
||||
..
|
||||
}
|
||||
));
|
||||
}
|
||||
_ => panic!("expected or at top level"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn grouped_expr() {
|
||||
let expr = parse("(a + b)");
|
||||
assert!(matches!(expr.kind, ExprKind::Group(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn field_access() {
|
||||
let expr = parse("foo.bar");
|
||||
assert!(matches!(expr.kind, ExprKind::Field { ref field, .. } if field == "bar"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn index_expr() {
|
||||
let expr = parse("arr[0]");
|
||||
assert!(matches!(expr.kind, ExprKind::Index { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn call_no_args() {
|
||||
let expr = parse("foo()");
|
||||
match &expr.kind {
|
||||
ExprKind::Call { args, .. } => assert!(args.is_empty()),
|
||||
_ => panic!("expected call"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn call_with_args() {
|
||||
let expr = parse("foo(1, 2, 3)");
|
||||
match &expr.kind {
|
||||
ExprKind::Call { args, .. } => assert_eq!(args.len(), 3),
|
||||
_ => panic!("expected call"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn struct_literal() {
|
||||
let expr = parse("Foo { x: 1, y: 2 }");
|
||||
match &expr.kind {
|
||||
ExprKind::StructLit { name, fields, .. } => {
|
||||
assert_eq!(name, "Foo");
|
||||
assert_eq!(fields.len(), 2);
|
||||
}
|
||||
_ => panic!("expected struct literal, got {:?}", expr.kind),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn struct_literal_disabled() {
|
||||
// With allow_struct_literals=false, `Foo { ... }` should NOT be a
|
||||
// struct literal — the Ident is parsed alone and `{` is left unconsumed.
|
||||
let expr = parse_no_struct("Foo { x: 1 }");
|
||||
assert!(matches!(expr.kind, ExprKind::Ident(ref s) if s == "Foo"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chained_field_access() {
|
||||
let expr = parse("a.b.c");
|
||||
match &expr.kind {
|
||||
ExprKind::Field {
|
||||
expr: inner, field, ..
|
||||
} => {
|
||||
assert_eq!(field, "c");
|
||||
assert!(matches!(inner.kind, ExprKind::Field { ref field, .. } if field == "b"));
|
||||
}
|
||||
_ => panic!("expected field access"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deref_and_addrof() {
|
||||
assert!(matches!(
|
||||
parse("*p").kind,
|
||||
ExprKind::Unary {
|
||||
op: UnaryOp::Deref,
|
||||
..
|
||||
}
|
||||
));
|
||||
assert!(matches!(
|
||||
parse("&x").kind,
|
||||
ExprKind::Unary {
|
||||
op: UnaryOp::AddrOf,
|
||||
..
|
||||
}
|
||||
));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user