diff --git a/src/ast.rs b/src/ast.rs index 80aa883..7e73509 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -5,9 +5,10 @@ //! stages. Currently only the [`Parsed`] phase exists, which attaches no //! extra data (`()`) to each node. //! -//! The two primary node families are: +//! The primary node families are: //! - [`Expression`] / [`ExpressionKind`] — value-producing constructs. //! - [`Type`] / [`TypeKind`] — type annotations. +//! - [`Statement`] / [`StatementKind`] — top-level and block-level statements. use std::fmt::Debug; use crate::token::Span; @@ -198,3 +199,44 @@ pub enum TypeKind { /// A user-defined named type, e.g. `MyStruct`. Named(String), } + +/// Convenience alias for a [`Statement`] in the [`Parsed`] phase. +pub type ParsedStatement = Statement; + +/// A statement node in the AST. +/// +/// Statements are the sequential building blocks of a block body. Like +/// [`Expression`] and [`Type`], a statement is parameterised over a [`Phase`] +/// so that later compiler passes can attach additional information without +/// changing the node layout. +#[derive(Debug, PartialEq)] +pub struct Statement { + pub kind: StatementKind

, + pub span: Span, + pub extra: P::ExtraData, +} + +/// The concrete variant of a [`Statement`]. +#[derive(Debug, PartialEq)] +pub enum StatementKind { + /// A `let` binding, e.g. `let x: i32 = 0;`. + /// + /// Both the type annotation and the initialiser are optional at the parse + /// stage and may be filled in or validated by later passes. + Let { + /// The name of the binding. + name: String, + /// Source span of the name token, used for diagnostics. + name_span: Span, + /// Optional explicit type annotation (`let x: T`). + ty: Option>, + /// Optional initialiser expression (`= `). + value: Option>, + }, + + /// A bare expression statement, e.g. `f(x);`. + /// + /// The trailing `;` is not stored in the node but is included in + /// [`Statement::span`]. + Expr(Expression

), +} diff --git a/src/lexer.rs b/src/lexer.rs index 3bbed7b..a6807f8 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -116,6 +116,7 @@ impl<'src> Lexer<'src> { "and" => TokenKind::KwAnd, "or" => TokenKind::KwOr, "as" => TokenKind::KwAs, + "let" => TokenKind::KwLet, "u8" => TokenKind::TyU8, "u16" => TokenKind::TyU16, diff --git a/src/main.rs b/src/main.rs index 89d2dd3..45e62f1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -30,7 +30,7 @@ fn main() { println!("-- {} --", file.display()); let mut parser = Parser::new(&content); - match parser.parse_expression(0) { + match parser.parse_statement() { Ok(ast) => println!("{ast:#?}"), Err(diag) => diag.report(file, &content), } diff --git a/src/parser.rs b/src/parser.rs index 92e14ee..35c0d16 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,6 +1,7 @@ //! Recursive-descent / Pratt parser that converts a token stream into an AST. //! -//! The entry points are [`Parser::parse_type`] and [`Parser::parse_expression`]. +//! The entry points are [`Parser::parse_statement`], [`Parser::parse_type`], +//! and [`Parser::parse_expression`]. //! Errors are represented as [`Diagnostic`] values; the caller is responsible //! for reporting them. use std::iter::Peekable; @@ -104,6 +105,72 @@ impl<'src> Parser<'src> { } } + /// Parses the next statement. + /// + /// Dispatches to the appropriate specialised parser based on the leading + /// token: + /// - `let` → [`parse_let_statement`](Self::parse_let_statement) + /// - anything else → an expression followed by a mandatory `;` + pub fn parse_statement(&mut self) -> Result { + let peek = self.peek_no_eof()?; + + match peek.kind { + TokenKind::KwLet => self.parse_let_statement(), + + _ => { + let expr = self.parse_expression(0)?; + let semi_token = self.expect(TokenKind::Semi)?; + let span = expr.span.extend(semi_token.span); + + Ok(ast::ParsedStatement { + kind: ast::StatementKind::Expr(expr), + span, + extra: (), + }) + } + } + } + + /// Parses a `let` binding statement: `let [: ] [= ];`. + /// + /// Both the type annotation and the initialiser are optional. The + /// statement span runs from the `let` keyword through to the closing `;`. + fn parse_let_statement(&mut self) -> Result { + let let_token = self.expect(TokenKind::KwLet)?; + + let (name, name_span) = { + let ident_token = self.expect(TokenKind::Identifier)?; + (ident_token.text.to_string(), ident_token.span) + }; + + let ty = if self.is_peek(TokenKind::Colon) { + self.advance(); + Some(self.parse_type()?) + } else { + None + }; + + let value = if self.is_peek(TokenKind::Assign) { + self.advance(); + Some(self.parse_expression(0)?) + } else { + None + }; + + let semi_token = self.expect(TokenKind::Semi)?; + + Ok(ast::ParsedStatement { + kind: ast::StatementKind::Let { + name, + name_span, + ty, + value, + }, + span: let_token.span.extend(semi_token.span), + extra: (), + }) + } + /// Parses a type annotation, e.g. `u8`, `i64`, `bool`, or a user-defined /// named type. /// diff --git a/src/token.rs b/src/token.rs index 5aa046e..a8850a1 100644 --- a/src/token.rs +++ b/src/token.rs @@ -93,6 +93,7 @@ define_tokens! { KwAnd => "`and`", KwOr => "`or`", KwAs => "`as`", + KwLet => "`let`", // -- Type Keywords -- TyU8 => "`u8`", diff --git a/test.bky b/test.bky index e10bb17..d75e696 100644 --- a/test.bky +++ b/test.bky @@ -1 +1 @@ -foo.bar - 5 as i32 \ No newline at end of file +let test: i32 = foo.bar - 5 as i32; \ No newline at end of file