diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..ec6f86f --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,60 @@ +use crate::token::Span; + +#[derive(Debug, PartialEq, Eq)] +pub struct Expression { + pub kind: ExpressionKind, + pub span: Span, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum ExpressionKind { + Integer(u64), + Boolean(bool), + Identifier(Box), + + Unary { + op: UnaryOp, + op_span: Span, + expr: Box, + }, + + Binary { + op: BinaryOp, + op_span: Span, + left: Box, + right: Box, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UnaryOp { + LogicalNot, + BitwiseNot, + Negate, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinaryOp { + Add, + Subtract, + Multiply, + Divide, + Remainder, + + BitwiseAnd, + BitwiseOr, + BitwiseXor, + + LogicalAnd, + LogicalOr, + + Equal, + Unequal, + LessThan, + LessThanOrEqual, + GreaterThan, + GreaterThanOrEqual, + + Assign, + Member, +} diff --git a/src/main.rs b/src/main.rs index aa44cdb..8ff526e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,11 +1,11 @@ -use crate::token::Tokenizer; +use crate::parser::Parser; +pub mod ast; +pub mod parser; pub mod token; fn main() { - let input = include_str!("../example/main.bky"); + let mut parser = Parser::new("user.age >= 18"); - for token in Tokenizer::new(input) { - println!("{token:?}"); - } + println!("{:#?}", parser.parse_expression(0)); } diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..2c66e2a --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,185 @@ +use multipeek::{IteratorExt, MultiPeek}; + +use crate::{ + ast::*, + token::{Span, Token, TokenKind, Tokenizer}, +}; + +#[derive(Debug)] +pub enum ParserError { + UnexpectedToken { + expected: &'static [TokenKind], + found: TokenKind, + span: Span, + }, + UnexpectedEof, +} + +pub type ParserResult = std::result::Result; + +pub struct Parser<'src> { + tokens: MultiPeek>, +} + +impl<'src> Parser<'src> { + pub fn new(input: &'src str) -> Self { + Self { + tokens: Tokenizer::new(input).multipeek(), + } + } + + fn peek(&mut self) -> Option> { + self.tokens.peek().copied() + } + + fn peek_no_eof(&mut self) -> ParserResult> { + self.peek().ok_or(ParserError::UnexpectedEof) + } + + fn consume(&mut self) -> Option> { + self.tokens.next() + } + + pub fn parse_expression(&mut self, min_binding_power: u8) -> ParserResult { + let mut left = self.parse_leading_expression()?; + + loop { + let Some(peek) = self.peek() else { + break; + }; + + if let Some((left_binding_power, right_binding_power, op)) = + infix_binding_power(peek.kind) + { + if left_binding_power < min_binding_power { + break; + } + + // consume operator token + self.consume(); + + let right = self.parse_expression(right_binding_power)?; + let span = Span::new(left.span.start, right.span.end); + + left = Expression { + kind: ExpressionKind::Binary { + op, + op_span: peek.span, + left: Box::new(left), + right: Box::new(right), + }, + span, + }; + } + } + + Ok(left) + } + + fn parse_leading_expression(&mut self) -> ParserResult { + let peek = self.peek_no_eof()?; + + match peek.kind { + TokenKind::Identifier => { + self.consume(); + + let text = peek.text.to_string().into_boxed_str(); + Ok(Expression { + kind: ExpressionKind::Identifier(text), + span: peek.span, + }) + } + TokenKind::Integer => { + self.consume(); + + let value = peek.text.parse::().unwrap(); + Ok(Expression { + kind: ExpressionKind::Integer(value), + span: peek.span, + }) + } + TokenKind::Boolean => { + self.consume(); + + let value = peek.text == "true"; + Ok(Expression { + kind: ExpressionKind::Boolean(value), + span: peek.span, + }) + } + _ => { + if let Some((right_binding_power, op)) = prefix_binding_power(peek.kind) { + self.consume(); + + let op_span = peek.span; + let expr = self.parse_expression(right_binding_power)?; + + Ok(Expression { + kind: ExpressionKind::Unary { + op, + op_span, + expr: Box::new(expr), + }, + span: peek.span, + }) + } else { + Err(ParserError::UnexpectedToken { + expected: &[ + TokenKind::Identifier, + TokenKind::Integer, + TokenKind::Boolean, + TokenKind::KwNot, + TokenKind::Tilde, + TokenKind::Minus, + ], + found: peek.kind, + span: peek.span, + }) + } + } + } + } +} + +fn prefix_binding_power(kind: TokenKind) -> Option<(u8, UnaryOp)> { + // Update `parse_leading_expression` when adding more operators. + Some(match kind { + TokenKind::KwNot => (28, UnaryOp::LogicalNot), + TokenKind::Tilde => (28, UnaryOp::BitwiseNot), + TokenKind::Minus => (28, UnaryOp::Negate), + + _ => return None, + }) +} + +fn infix_binding_power(kind: TokenKind) -> Option<(u8, u8, BinaryOp)> { + Some(match kind { + TokenKind::Dot => (30, 31, BinaryOp::Member), + + TokenKind::Asterisk => (26, 27, BinaryOp::Multiply), + TokenKind::Slash => (26, 27, BinaryOp::Divide), + TokenKind::Percent => (26, 27, BinaryOp::Remainder), + + TokenKind::Plus => (24, 25, BinaryOp::Add), + TokenKind::Minus => (24, 25, BinaryOp::Subtract), + + TokenKind::LessThan => (20, 21, BinaryOp::LessThan), + TokenKind::LessThanOrEqual => (20, 21, BinaryOp::LessThanOrEqual), + TokenKind::GreaterThan => (20, 21, BinaryOp::GreaterThan), + TokenKind::GreaterThanOrEqual => (20, 21, BinaryOp::GreaterThanOrEqual), + + TokenKind::Equal => (18, 19, BinaryOp::Equal), + TokenKind::Unequal => (18, 19, BinaryOp::Unequal), + + TokenKind::Ampersand => (16, 17, BinaryOp::BitwiseAnd), + TokenKind::Caret => (14, 15, BinaryOp::BitwiseXor), + TokenKind::Pipe => (12, 13, BinaryOp::BitwiseOr), + + TokenKind::KwAnd => (10, 11, BinaryOp::LogicalAnd), + TokenKind::KwOr => (8, 9, BinaryOp::LogicalOr), + + TokenKind::Assign => (5, 4, BinaryOp::Assign), + + _ => return None, + }) +} diff --git a/src/token.rs b/src/token.rs index 5fab5c0..5df54c6 100644 --- a/src/token.rs +++ b/src/token.rs @@ -47,6 +47,10 @@ pub enum TokenKind { KwBreak, KwReturn, + KwNot, + KwAnd, + KwOr, + Identifier, Integer, Boolean, @@ -59,7 +63,7 @@ pub enum TokenKind { Ampersand, Pipe, Caret, - Bang, + Tilde, Equal, Unequal, @@ -184,6 +188,10 @@ impl<'src> Tokenizer<'src> { "break" => TokenKind::KwBreak, "return" => TokenKind::KwReturn, + "or" => TokenKind::KwOr, + "and" => TokenKind::KwAnd, + "not" => TokenKind::KwNot, + _ => TokenKind::Identifier, }; @@ -223,6 +231,7 @@ impl<'src> Tokenizer<'src> { '&' => single!(TokenKind::Ampersand), '|' => single!(TokenKind::Pipe), '^' => single!(TokenKind::Caret), + '~' => single!(TokenKind::Tilde), '.' => single!(TokenKind::Dot), ',' => single!(TokenKind::Comma), @@ -230,42 +239,42 @@ impl<'src> Tokenizer<'src> { ';' => single!(TokenKind::Semicolon), '=' => { + self.consume(); + if self.peek() == Some('=') { - self.consume(); self.consume(); TokenKind::Equal } else { - self.consume(); TokenKind::Assign } } '!' => { + self.consume(); + if self.peek() == Some('=') { - self.consume(); self.consume(); TokenKind::Unequal } else { - self.consume(); - TokenKind::Bang + TokenKind::InvalidCharacter } } '<' => { + self.consume(); + if self.peek() == Some('=') { - self.consume(); self.consume(); TokenKind::LessThanOrEqual } else { - self.consume(); TokenKind::LessThan } } '>' => { + self.consume(); + if self.peek() == Some('=') { - self.consume(); self.consume(); TokenKind::GreaterThanOrEqual } else { - self.consume(); TokenKind::GreaterThan } }