feat: Add Expression definition and parsing logic.

This commit is contained in:
Jooris Hadeler
2026-01-12 16:47:52 +01:00
parent 0599a5fb98
commit 2170be5204
4 changed files with 269 additions and 15 deletions

60
src/ast.rs Normal file
View File

@@ -0,0 +1,60 @@
use crate::token::Span;
#[derive(Debug, PartialEq, Eq)]
pub struct Expression {
pub kind: ExpressionKind,
pub span: Span,
}
#[derive(Debug, PartialEq, Eq)]
pub enum ExpressionKind {
Integer(u64),
Boolean(bool),
Identifier(Box<str>),
Unary {
op: UnaryOp,
op_span: Span,
expr: Box<Expression>,
},
Binary {
op: BinaryOp,
op_span: Span,
left: Box<Expression>,
right: Box<Expression>,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UnaryOp {
LogicalNot,
BitwiseNot,
Negate,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BinaryOp {
Add,
Subtract,
Multiply,
Divide,
Remainder,
BitwiseAnd,
BitwiseOr,
BitwiseXor,
LogicalAnd,
LogicalOr,
Equal,
Unequal,
LessThan,
LessThanOrEqual,
GreaterThan,
GreaterThanOrEqual,
Assign,
Member,
}

View File

@@ -1,11 +1,11 @@
use crate::token::Tokenizer; use crate::parser::Parser;
pub mod ast;
pub mod parser;
pub mod token; pub mod token;
fn main() { fn main() {
let input = include_str!("../example/main.bky"); let mut parser = Parser::new("user.age >= 18");
for token in Tokenizer::new(input) { println!("{:#?}", parser.parse_expression(0));
println!("{token:?}");
}
} }

185
src/parser.rs Normal file
View File

@@ -0,0 +1,185 @@
use multipeek::{IteratorExt, MultiPeek};
use crate::{
ast::*,
token::{Span, Token, TokenKind, Tokenizer},
};
#[derive(Debug)]
pub enum ParserError {
UnexpectedToken {
expected: &'static [TokenKind],
found: TokenKind,
span: Span,
},
UnexpectedEof,
}
pub type ParserResult<T> = std::result::Result<T, ParserError>;
pub struct Parser<'src> {
tokens: MultiPeek<Tokenizer<'src>>,
}
impl<'src> Parser<'src> {
pub fn new(input: &'src str) -> Self {
Self {
tokens: Tokenizer::new(input).multipeek(),
}
}
fn peek(&mut self) -> Option<Token<'src>> {
self.tokens.peek().copied()
}
fn peek_no_eof(&mut self) -> ParserResult<Token<'src>> {
self.peek().ok_or(ParserError::UnexpectedEof)
}
fn consume(&mut self) -> Option<Token<'src>> {
self.tokens.next()
}
pub fn parse_expression(&mut self, min_binding_power: u8) -> ParserResult<Expression> {
let mut left = self.parse_leading_expression()?;
loop {
let Some(peek) = self.peek() else {
break;
};
if let Some((left_binding_power, right_binding_power, op)) =
infix_binding_power(peek.kind)
{
if left_binding_power < min_binding_power {
break;
}
// consume operator token
self.consume();
let right = self.parse_expression(right_binding_power)?;
let span = Span::new(left.span.start, right.span.end);
left = Expression {
kind: ExpressionKind::Binary {
op,
op_span: peek.span,
left: Box::new(left),
right: Box::new(right),
},
span,
};
}
}
Ok(left)
}
fn parse_leading_expression(&mut self) -> ParserResult<Expression> {
let peek = self.peek_no_eof()?;
match peek.kind {
TokenKind::Identifier => {
self.consume();
let text = peek.text.to_string().into_boxed_str();
Ok(Expression {
kind: ExpressionKind::Identifier(text),
span: peek.span,
})
}
TokenKind::Integer => {
self.consume();
let value = peek.text.parse::<u64>().unwrap();
Ok(Expression {
kind: ExpressionKind::Integer(value),
span: peek.span,
})
}
TokenKind::Boolean => {
self.consume();
let value = peek.text == "true";
Ok(Expression {
kind: ExpressionKind::Boolean(value),
span: peek.span,
})
}
_ => {
if let Some((right_binding_power, op)) = prefix_binding_power(peek.kind) {
self.consume();
let op_span = peek.span;
let expr = self.parse_expression(right_binding_power)?;
Ok(Expression {
kind: ExpressionKind::Unary {
op,
op_span,
expr: Box::new(expr),
},
span: peek.span,
})
} else {
Err(ParserError::UnexpectedToken {
expected: &[
TokenKind::Identifier,
TokenKind::Integer,
TokenKind::Boolean,
TokenKind::KwNot,
TokenKind::Tilde,
TokenKind::Minus,
],
found: peek.kind,
span: peek.span,
})
}
}
}
}
}
fn prefix_binding_power(kind: TokenKind) -> Option<(u8, UnaryOp)> {
// Update `parse_leading_expression` when adding more operators.
Some(match kind {
TokenKind::KwNot => (28, UnaryOp::LogicalNot),
TokenKind::Tilde => (28, UnaryOp::BitwiseNot),
TokenKind::Minus => (28, UnaryOp::Negate),
_ => return None,
})
}
fn infix_binding_power(kind: TokenKind) -> Option<(u8, u8, BinaryOp)> {
Some(match kind {
TokenKind::Dot => (30, 31, BinaryOp::Member),
TokenKind::Asterisk => (26, 27, BinaryOp::Multiply),
TokenKind::Slash => (26, 27, BinaryOp::Divide),
TokenKind::Percent => (26, 27, BinaryOp::Remainder),
TokenKind::Plus => (24, 25, BinaryOp::Add),
TokenKind::Minus => (24, 25, BinaryOp::Subtract),
TokenKind::LessThan => (20, 21, BinaryOp::LessThan),
TokenKind::LessThanOrEqual => (20, 21, BinaryOp::LessThanOrEqual),
TokenKind::GreaterThan => (20, 21, BinaryOp::GreaterThan),
TokenKind::GreaterThanOrEqual => (20, 21, BinaryOp::GreaterThanOrEqual),
TokenKind::Equal => (18, 19, BinaryOp::Equal),
TokenKind::Unequal => (18, 19, BinaryOp::Unequal),
TokenKind::Ampersand => (16, 17, BinaryOp::BitwiseAnd),
TokenKind::Caret => (14, 15, BinaryOp::BitwiseXor),
TokenKind::Pipe => (12, 13, BinaryOp::BitwiseOr),
TokenKind::KwAnd => (10, 11, BinaryOp::LogicalAnd),
TokenKind::KwOr => (8, 9, BinaryOp::LogicalOr),
TokenKind::Assign => (5, 4, BinaryOp::Assign),
_ => return None,
})
}

View File

@@ -47,6 +47,10 @@ pub enum TokenKind {
KwBreak, KwBreak,
KwReturn, KwReturn,
KwNot,
KwAnd,
KwOr,
Identifier, Identifier,
Integer, Integer,
Boolean, Boolean,
@@ -59,7 +63,7 @@ pub enum TokenKind {
Ampersand, Ampersand,
Pipe, Pipe,
Caret, Caret,
Bang, Tilde,
Equal, Equal,
Unequal, Unequal,
@@ -184,6 +188,10 @@ impl<'src> Tokenizer<'src> {
"break" => TokenKind::KwBreak, "break" => TokenKind::KwBreak,
"return" => TokenKind::KwReturn, "return" => TokenKind::KwReturn,
"or" => TokenKind::KwOr,
"and" => TokenKind::KwAnd,
"not" => TokenKind::KwNot,
_ => TokenKind::Identifier, _ => TokenKind::Identifier,
}; };
@@ -223,6 +231,7 @@ impl<'src> Tokenizer<'src> {
'&' => single!(TokenKind::Ampersand), '&' => single!(TokenKind::Ampersand),
'|' => single!(TokenKind::Pipe), '|' => single!(TokenKind::Pipe),
'^' => single!(TokenKind::Caret), '^' => single!(TokenKind::Caret),
'~' => single!(TokenKind::Tilde),
'.' => single!(TokenKind::Dot), '.' => single!(TokenKind::Dot),
',' => single!(TokenKind::Comma), ',' => single!(TokenKind::Comma),
@@ -230,42 +239,42 @@ impl<'src> Tokenizer<'src> {
';' => single!(TokenKind::Semicolon), ';' => single!(TokenKind::Semicolon),
'=' => { '=' => {
if self.peek() == Some('=') {
self.consume(); self.consume();
if self.peek() == Some('=') {
self.consume(); self.consume();
TokenKind::Equal TokenKind::Equal
} else { } else {
self.consume();
TokenKind::Assign TokenKind::Assign
} }
} }
'!' => { '!' => {
if self.peek() == Some('=') {
self.consume(); self.consume();
if self.peek() == Some('=') {
self.consume(); self.consume();
TokenKind::Unequal TokenKind::Unequal
} else { } else {
self.consume(); TokenKind::InvalidCharacter
TokenKind::Bang
} }
} }
'<' => { '<' => {
if self.peek() == Some('=') {
self.consume(); self.consume();
if self.peek() == Some('=') {
self.consume(); self.consume();
TokenKind::LessThanOrEqual TokenKind::LessThanOrEqual
} else { } else {
self.consume();
TokenKind::LessThan TokenKind::LessThan
} }
} }
'>' => { '>' => {
if self.peek() == Some('=') {
self.consume(); self.consume();
if self.peek() == Some('=') {
self.consume(); self.consume();
TokenKind::GreaterThanOrEqual TokenKind::GreaterThanOrEqual
} else { } else {
self.consume();
TokenKind::GreaterThan TokenKind::GreaterThan
} }
} }