From 4e2df32e3607cd4540c476e9e8facecd6688af96 Mon Sep 17 00:00:00 2001 From: Jooris Hadeler Date: Thu, 12 Mar 2026 12:50:17 +0100 Subject: [PATCH] feat: Add support for type parsing. This commit adds simple primitive type parsing, it also adds a new type of expression called the cast expression. --- src/ast.rs | 31 ++++++++++++++++++ src/lexer.rs | 6 ++++ src/parser.rs | 87 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/token.rs | 18 ++++++----- test.bky | 2 +- 5 files changed, 132 insertions(+), 12 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index b87d181..7449fe5 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -60,6 +60,11 @@ pub enum ExpressionKind { expr: Box>, index: Box>, }, + + Cast { + expr: Box>, + ty: Box>, + }, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -123,3 +128,29 @@ pub enum BinaryOp { /// Member Access Dot, } + +pub type ParsedType = Type; + +#[derive(Debug, PartialEq)] +pub struct Type { + pub kind: TypeKind, + pub span: Span, + pub extra: P::ExtraData, +} + +#[derive(Debug, PartialEq)] +pub enum TypeKind { + I8, + I16, + I32, + I64, + + U8, + U16, + U32, + U64, + + Bool, + + Named(String), +} diff --git a/src/lexer.rs b/src/lexer.rs index 4254b77..c5a448e 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -71,14 +71,20 @@ impl<'src> Lexer<'src> { match &self.source[start..self.position] { "and" => TokenKind::KwAnd, "or" => TokenKind::KwOr, + "as" => TokenKind::KwAs, + "u8" => TokenKind::TyU8, "u16" => TokenKind::TyU16, "u32" => TokenKind::TyU32, "u64" => TokenKind::TyU64, + "i8" => TokenKind::TyI8, "i16" => TokenKind::TyI16, "i32" => TokenKind::TyI32, "i64" => TokenKind::TyI64, + + "bool" => TokenKind::TyBool, + "true" | "false" => TokenKind::LitBool, _ => TokenKind::Identifier, } diff --git a/src/parser.rs b/src/parser.rs index 66dc7dd..a789e59 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -63,6 +63,55 @@ impl<'src> Parser<'src> { } } + /// Skips [Token]s until we reach a neutral statement boundary, so that + /// subsequent statements can still be parsed cleanly. + fn synchronize(&mut self) { + while let Some(peek) = self.peek() { + match peek.kind { + // Consume the `;` and stop + TokenKind::Semi => { + self.advance(); + break; + } + + // Stop before these + TokenKind::RCurly => break, + + _ => _ = self.advance(), + } + } + } + + pub fn parse_type(&mut self) -> Result { + let peek = self.peek_no_eof()?; + + let kind = match peek.kind { + TokenKind::TyU8 => ast::TypeKind::U8, + TokenKind::TyU16 => ast::TypeKind::U16, + TokenKind::TyU32 => ast::TypeKind::U32, + TokenKind::TyU64 => ast::TypeKind::U64, + + TokenKind::TyI8 => ast::TypeKind::I8, + TokenKind::TyI16 => ast::TypeKind::I16, + TokenKind::TyI32 => ast::TypeKind::I32, + TokenKind::TyI64 => ast::TypeKind::I64, + + TokenKind::TyBool => ast::TypeKind::Bool, + + TokenKind::Identifier => ast::TypeKind::Named(peek.text.to_string()), + + _ => return Err(Diagnostic::new(Severity::Error, "expected a type")), + }; + + let span = self.advance().span; + + Ok(ast::ParsedType { + kind, + span, + extra: (), + }) + } + /// Parses an [ast::Expression] using the pratt parsing algorithm. pub fn parse_expression(&mut self, min_bp: u8) -> Result { let peek_token = self.peek_no_eof()?; @@ -95,6 +144,7 @@ impl<'src> Parser<'src> { left = match peek_token.kind { TokenKind::LParen => self.parse_call_expr(left)?, TokenKind::LBracket => self.parse_index_expr(left)?, + TokenKind::KwAs => self.parse_cast_expr(left)?, _ => unreachable!(), }; @@ -177,7 +227,17 @@ impl<'src> Parser<'src> { extra: (), }) } + TokenKind::LParen => { + let open_paren = self.advance(); + let inner = self.parse_expression(0)?; + let close_paren = self.expect(TokenKind::RParen)?; + Ok(ast::ParsedExpression { + kind: inner.kind, + span: open_paren.span.extend(close_paren.span), + extra: (), + }) + } _ => Err(Diagnostic::new( Severity::Error, format!( @@ -242,6 +302,26 @@ impl<'src> Parser<'src> { extra: (), }) } + + /// Parses an [ast::ExpressionKind::Cast] expression. + fn parse_cast_expr( + &mut self, + expr: ast::ParsedExpression, + ) -> Result { + self.expect(TokenKind::KwAs)?; + + let ty = self.parse_type()?; + let span = expr.span.extend(ty.span); + + Ok(ast::ParsedExpression { + kind: ast::ExpressionKind::Cast { + expr: Box::new(expr), + ty: Box::new(ty), + }, + span, + extra: (), + }) + } } fn infix_binding_power(kind: TokenKind) -> Option<(u8, u8, ast::BinaryOp)> { @@ -273,7 +353,7 @@ fn infix_binding_power(kind: TokenKind) -> Option<(u8, u8, ast::BinaryOp)> { TokenKind::Slash => (70, 71, ast::BinaryOp::Div), TokenKind::Percent => (70, 71, ast::BinaryOp::Rem), - TokenKind::Dot => (90, 91, ast::BinaryOp::Dot), + TokenKind::Dot => (100, 101, ast::BinaryOp::Dot), _ => return None, }) @@ -293,8 +373,9 @@ fn prefix_binding_power(kind: TokenKind) -> Option<(u8, ast::UnaryOp)> { fn postfix_binding_power(kind: TokenKind) -> Option { Some(match kind { - TokenKind::LParen => 90, - TokenKind::LBracket => 90, + TokenKind::LParen => 100, + TokenKind::LBracket => 100, + TokenKind::KwAs => 90, _ => return None, }) diff --git a/src/token.rs b/src/token.rs index f63a347..cffb4d7 100644 --- a/src/token.rs +++ b/src/token.rs @@ -68,16 +68,18 @@ define_tokens! { // -- Keywords -- KwAnd => "`and`", KwOr => "`or`", + KwAs => "`as`", // -- Type Keywords -- - TyU8 => "`u8`", - TyU16 => "`u16`", - TyU32 => "`u32`", - TyU64 => "`u64`", - TyI8 => "`i8`", - TyI16 => "`i16`", - TyI32 => "`i32`", - TyI64 => "`i64`", + TyU8 => "`u8`", + TyU16 => "`u16`", + TyU32 => "`u32`", + TyU64 => "`u64`", + TyI8 => "`i8`", + TyI16 => "`i16`", + TyI32 => "`i32`", + TyI64 => "`i64`", + TyBool => "`bool`", // -- Arithmetic Operators -- Plus => "`+`", diff --git a/test.bky b/test.bky index 534e282..e10bb17 100644 --- a/test.bky +++ b/test.bky @@ -1 +1 @@ -foo.bar(12, 3) - 5 +foo.bar - 5 as i32 \ No newline at end of file