feat: Add support for let and expression statements.

This commit implements parsing for `let` statements, anything that is not a let statement will be assumed to be an expression statement.
docs: Add and improve documentation for every module.
2026-03-12 21:23:09 +01:00 · 2026-03-12 20:44:41 +01:00
8 changed files with 497 additions and 75 deletions
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -1,15 +1,34 @@
+//! Abstract Syntax Tree (AST) definitions.
+//!
+//! The AST is parameterised over a [`Phase`] type-state so that the same node
+//! types can carry different amounts of information at different compiler
+//! stages.  Currently only the [`Parsed`] phase exists, which attaches no
+//! extra data (`()`) to each node.
+//!
+//! The primary node families are:
+//! - [`Expression`] / [`ExpressionKind`] — value-producing constructs.
+//! - [`Type`] / [`TypeKind`] — type annotations.
+//! - [`Statement`] / [`StatementKind`] — top-level and block-level statements.
 use std::fmt::Debug;

 use crate::token::Span;

-/// The [Phase] trait is used for type state. The AST can be in one of multiple
-/// type states:
-/// 1. [Parsed] - AST that was produced through parsing.
+/// Marker trait that carries phase-specific associated data for AST nodes.
+///
+/// Each phase defines an [`ExtraData`](Phase::ExtraData) type that is embedded
+/// in every node.  This allows later compiler passes (e.g. type-checking) to
+/// augment the tree without duplicating the node hierarchy.
+///
+/// Current phases:
+/// - [`Parsed`] — produced directly by the parser; no extra data.
 pub trait Phase {
    type ExtraData: PartialEq + Debug;
 }

-/// See [Phase] for more information.
+/// The initial AST phase produced by the parser.
+///
+/// In this phase [`Phase::ExtraData`] is `()`, meaning nodes carry only
+/// syntactic information (kind + source span).
 #[derive(Debug)]
 pub struct Parsed;

@@ -17,10 +36,15 @@ impl Phase for Parsed {
    type ExtraData = ();
 }

+/// Convenience alias for an [`Expression`] in the [`Parsed`] phase.
 pub type ParsedExpression = Expression<Parsed>;

-/// This represents an expression in the source code. It holds the
-/// [ExpressionKind], the [Span] and extra information according to the [Phase].
+/// A value-producing node in the AST.
+///
+/// Every expression carries:
+/// - [`kind`](Expression::kind) — what *kind* of expression it is.
+/// - [`span`](Expression::span) — the source location it was parsed from.
+/// - [`extra`](Expression::extra) — phase-specific data (see [`Phase`]).
 #[derive(Debug, PartialEq)]
 pub struct Expression<P: Phase> {
    pub kind: ExpressionKind<P>,
@@ -28,109 +52,125 @@ pub struct Expression<P: Phase> {
    pub extra: P::ExtraData,
 }

-/// Represents the different kinds of [Expression]s, e.g. literals, unary or
-/// binary expressions.
+/// The concrete variant of an [`Expression`].
 #[derive(Debug, PartialEq)]
 pub enum ExpressionKind<P: Phase> {
+    /// A bare name, e.g. `foo`.
    Identifier(String),

+    /// A string literal, e.g. `"hello"`.
    LitString(String),
+    /// An integer literal, e.g. `42`, `0xFF`, `0b1010`.  The value is stored
+    /// as a `u64` regardless of the source radix.
    LitInteger(u64),
+    /// A boolean literal: `true` or `false`.
    LitBool(bool),

+    /// A prefix unary expression, e.g. `-x`, `!cond`, `*ptr`.
    Unary {
        op: UnaryOp,
+        /// Source span of the operator token itself.
        op_span: Span,
        operand: Box<Expression<P>>,
    },

+    /// An infix binary expression, e.g. `a + b`, `x == y`.
    Binary {
        op: BinaryOp,
+        /// Source span of the operator token itself.
        op_span: Span,
        left: Box<Expression<P>>,
        right: Box<Expression<P>>,
    },

+    /// A function call, e.g. `f(a, b)`.
    Call {
+        /// The callee expression (often an [`Identifier`](ExpressionKind::Identifier)).
        func: Box<Expression<P>>,
        args: Vec<Expression<P>>,
    },

+    /// An index expression, e.g. `arr[i]`.
    Index {
        expr: Box<Expression<P>>,
        index: Box<Expression<P>>,
    },

+    /// A type-cast expression, e.g. `x as u32`.
    Cast {
        expr: Box<Expression<P>>,
        ty: Box<Type<P>>,
    },
 }

+/// A prefix unary operator.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum UnaryOp {
-    /// Bitwise Not
+    /// Bitwise complement (`~`)
    BitNot,
-    /// Logical Not
+    /// Logical negation (`!`)
    Not,
-    /// Negate
+    /// Arithmetic negation (`-`)
    Neg,
-    /// Address Of
+    /// Address-of (`&`)
    AddrOf,
-    /// Deref
+    /// Pointer dereference (`*`)
    Deref,
 }

+/// An infix binary operator.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum BinaryOp {
-    /// Addition
+    /// Addition (`+`)
    Add,
-    /// Subtraction
+    /// Subtraction (`-`)
    Sub,
-    /// Multiplication
+    /// Multiplication (`*`)
    Mul,
-    /// Division
+    /// Division (`/`)
    Div,
-    /// Remainder
+    /// Remainder (`%`)
    Rem,

-    /// Bitwise And
+    /// Bitwise AND (`&`)
    BitAnd,
-    /// Bitwise Or
+    /// Bitwise OR (`|`)
    BitOr,
-    /// Bitwise Xor
+    /// Bitwise XOR (`^`)
    BitXor,
-    /// Bitwise Shift Left
+    /// Left shift (`<<`)
    BitShl,
-    /// Bitwise Shift Right
+    /// Right shift (`>>`)
    BitShr,

-    /// Logical And
+    /// Logical AND (`and`)
    And,
-    /// Logical Or
+    /// Logical OR (`or`)
    Or,

-    /// Equal
+    /// Equality (`==`)
    Eq,
-    /// Not Equal
+    /// Inequality (`!=`)
    Ne,
-    /// Less than
+    /// Less-than (`<`)
    Lt,
-    /// Less than or Equal
+    /// Less-than-or-equal (`<=`)
    Le,
-    /// Greater than
+    /// Greater-than (`>`)
    Gt,
-    /// Greater than or Equal
+    /// Greater-than-or-equal (`>=`)
    Ge,

-    /// Assign
+    /// Assignment (`=`)
    Assign,
-    /// Member Access
+    /// Member access (`.`)
    Dot,
 }

+/// Convenience alias for a [`Type`] in the [`Parsed`] phase.
 pub type ParsedType = Type<Parsed>;

+/// A type annotation node in the AST.
 #[derive(Debug, PartialEq)]
 pub struct Type<P: Phase> {
    pub kind: TypeKind,
@@ -138,19 +178,65 @@ pub struct Type<P: Phase> {
    pub extra: P::ExtraData,
 }

+/// The concrete variant of a [`Type`] annotation.
 #[derive(Debug, PartialEq)]
 pub enum TypeKind {
+    /// Signed integers
    I8,
    I16,
    I32,
    I64,

+    /// Unsigned integers
    U8,
    U16,
    U32,
    U64,

+    /// Boolean type (`bool`)
    Bool,

+    /// A user-defined named type, e.g. `MyStruct`.
    Named(String),
 }
+
+/// Convenience alias for a [`Statement`] in the [`Parsed`] phase.
+pub type ParsedStatement = Statement<Parsed>;
+
+/// A statement node in the AST.
+///
+/// Statements are the sequential building blocks of a block body.  Like
+/// [`Expression`] and [`Type`], a statement is parameterised over a [`Phase`]
+/// so that later compiler passes can attach additional information without
+/// changing the node layout.
+#[derive(Debug, PartialEq)]
+pub struct Statement<P: Phase> {
+    pub kind: StatementKind<P>,
+    pub span: Span,
+    pub extra: P::ExtraData,
+}
+
+/// The concrete variant of a [`Statement`].
+#[derive(Debug, PartialEq)]
+pub enum StatementKind<P: Phase> {
+    /// A `let` binding, e.g. `let x: i32 = 0;`.
+    ///
+    /// Both the type annotation and the initialiser are optional at the parse
+    /// stage and may be filled in or validated by later passes.
+    Let {
+        /// The name of the binding.
+        name: String,
+        /// Source span of the name token, used for diagnostics.
+        name_span: Span,
+        /// Optional explicit type annotation (`let x: T`).
+        ty: Option<Type<P>>,
+        /// Optional initialiser expression (`= <expr>`).
+        value: Option<Expression<P>>,
+    },
+
+    /// A bare expression statement, e.g. `f(x);`.
+    ///
+    /// The trailing `;` is not stored in the node but is included in
+    /// [`Statement::span`].
+    Expr(Expression<P>),
+}
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -1,7 +1,18 @@
+//! Command-line interface: argument parsing, help/version output, and fatal
+//! error reporting.
+//!
+//! The primary entry point is [`parse_args`], which parses [`std::env::args`]
+//! and returns an [`Opts`] struct.  If any argument is invalid or required
+//! arguments are missing, it calls [`fatal`] which prints an error to `stderr`
+//! and exits with code `1`.
 use std::path::PathBuf;

 use yansi::Paint;

+/// Print the help message to `stdout`.
+///
+/// Describes the compiler's usage, all supported options, and the `<file>`
+/// positional argument.
 pub fn print_help() {
    println!(
        "{} {} - the bucky language compiler",
@@ -47,27 +58,52 @@ pub fn print_help() {
    );
 }

+/// Print the compiler version string (`buckyc <version>`) to `stdout`.
 pub fn print_version() {
    println!("buckyc {}", env!("CARGO_PKG_VERSION"));
 }

+/// Print a formatted error message to `stderr` and exit with code `1`.
+///
+/// This function never returns (`-> !`).  Use it for unrecoverable CLI errors
+/// such as missing arguments or unknown flags, discovered before compilation
+/// begins.
 pub fn fatal(message: impl ToString) -> ! {
    eprintln!("{}: {}", "error".bold().red(), message.to_string().bold());
    std::process::exit(1);
 }

+/// Parsed command-line options returned by [`parse_args`].
 #[derive(Debug)]
 pub struct Opts {
-    /// The list of files passed to the compiler.
+    /// One or more source files to compile, in the order they were supplied.
    pub files: Vec<PathBuf>,
-    /// `-S`: emit IR and stop (implies `-c`).
+    /// `-S`: emit IR and stop (implies [`no_link`](Opts::no_link)).
    pub emit_ir: bool,
-    /// `-c`: compile source to object file without linking.
+    /// `-c`: compile to an object file without invoking the linker.
    pub no_link: bool,
-    /// `-o <file>`: write final output to this path.
+    /// `-o <file>`: destination path for the final output.  When `None` the
+    /// compiler chooses a default output name.
    pub output: Option<PathBuf>,
 }

+/// Parse [`std::env::args`] and return the resulting [`Opts`].
+///
+/// Recognised flags:
+///
+/// | Flag | Effect |
+/// |------|--------|
+/// | `-h`, `--help` | Print help and exit `0` |
+/// | `-V`, `--version` | Print version and exit `0` |
+/// | `-S` | Set [`emit_ir`](Opts::emit_ir) and [`no_link`](Opts::no_link) |
+/// | `-c` | Set [`no_link`](Opts::no_link) |
+/// | `-o <file>` | Set [`output`](Opts::output) |
+/// | `<file>` | Append to [`files`](Opts::files) |
+///
+/// Calls [`fatal`] (and exits) if:
+/// - an unknown `-`-prefixed flag is encountered, or
+/// - `-o` is supplied without a following argument, or
+/// - no source files are provided.
 pub fn parse_args() -> Opts {
    let mut files = Vec::new();
    let mut no_link = false;
--- a/src/diagnostic.rs
+++ b/src/diagnostic.rs
@@ -1,14 +1,37 @@
+//! Compiler diagnostic reporting with source-location context.
+//!
+//! This module provides [`Diagnostic`], a structured error/warning message that
+//! can optionally include a source span and one or more labelled secondary
+//! spans.  Diagnostics are rendered to `stderr` in a rustc-inspired format:
+//!
+//! ```text
+//! Error: undeclared variable `x`
+//!   --> src/main.bky:3:5
+//!    |
+//!  3 | let y = x + 1;
+//!    |         ^ undeclared variable
+//!    |
+//! ```
 use std::{fmt::Display, path::Path, process::exit};

 use yansi::Paint;

 use crate::token::Span;

+/// The importance level of a [`Diagnostic`].
+///
+/// Variants are ordered from least to most severe so that `<` / `>` comparisons
+/// work intuitively (e.g. `Severity::Warning < Severity::Error`).
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub enum Severity {
+    /// Purely informational; never causes the compiler to stop.
    Note,
+    /// Something suspicious that may or may not be a problem.
    Warning,
+    /// A recoverable problem that prevents successful compilation.
    Error,
+    /// An unrecoverable problem; the process will exit immediately after
+    /// reporting this diagnostic.
    Critical,
 }

@@ -23,14 +46,29 @@ impl Display for Severity {
    }
 }

+/// A single compiler message with optional source-location information.
+///
+/// Build a diagnostic with [`Diagnostic::new`], optionally attach a primary
+/// source location via [`with_span`](Diagnostic::with_span), attach labelled
+/// secondary locations via [`add_label`](Diagnostic::add_label), then call
+/// [`report`](Diagnostic::report) to print it.
+///
+/// If the severity is [`Severity::Critical`], `report` will call
+/// [`process::exit`](std::process::exit) after printing.
 pub struct Diagnostic {
    pub severity: Severity,
+    /// Primary source location, if any.
    pub span: Option<Span>,
    pub message: String,
+    /// Secondary labelled spans rendered below the primary snippet.
    pub labels: Vec<(Span, String)>,
 }

 impl Diagnostic {
+    /// Create a new diagnostic with the given severity and message.
+    ///
+    /// No source location is attached; use [`with_span`](Self::with_span) to
+    /// add one.
    pub fn new(severity: Severity, message: impl ToString) -> Self {
        Self {
            severity,
@@ -40,16 +78,29 @@ impl Diagnostic {
        }
    }

+    /// Attach a primary source span to this diagnostic.
    pub fn with_span(mut self, span: Span) -> Self {
        self.span = Some(span);
        self
    }

+    /// Attach a labelled secondary span.
+    ///
+    /// Labels whose span matches the primary span exactly are merged into the
+    /// primary underline as inline text.  All other labels are rendered as
+    /// separate snippets below the primary one.
    pub fn add_label(mut self, span: Span, message: impl ToString) -> Self {
        self.labels.push((span, message.to_string()));
        self
    }

+    /// Print this diagnostic to `stderr` and, if the severity is
+    /// [`Severity::Critical`], terminate the process.
+    ///
+    /// # Arguments
+    /// * `file_name` – path shown in the `-->` location line.
+    /// * `source`    – full source text of the file, used to extract line/col
+    ///   information and to display the relevant source snippet.
    pub fn report(self, file_name: &Path, source: &str) {
        eprintln!("{}: {}", self.severity, self.message.bold());

@@ -165,6 +216,7 @@ fn render_snippet(
    eprintln!("{pad} {bar} {spaces}{colored_carets}{label_text}");
 }

+/// Apply severity-appropriate ANSI colour to a string.
 fn paint_severity(s: &str, severity: Severity) -> String {
    match severity {
        Severity::Note => format!("{}", s.bold().bright_cyan()),
@@ -173,6 +225,7 @@ fn paint_severity(s: &str, severity: Severity) -> String {
    }
 }

+/// Returns the number of decimal digits in `n` (minimum 1).
 fn count_digits(n: usize) -> usize {
    format!("{n}").len()
 }
@@ -187,6 +240,10 @@ fn get_line_content(source: &str, position: u32) -> (usize, &str) {
    (line_start, &rest[..line_len])
 }

+/// Returns the 1-based `(line, column)` for a byte `position` within `source`.
+///
+/// Both line and column are counted from 1.  The column is measured in Unicode
+/// scalar values (characters), not bytes.
 fn get_line_col(source: &str, position: u32) -> (usize, usize) {
    let prefix = &source[..position as usize];
    let line = prefix.bytes().filter(|&b| b == b'\n').count() + 1;
--- a/src/lexer.rs
+++ b/src/lexer.rs
@@ -1,16 +1,44 @@
+//! Lexer (tokeniser) that converts raw source text into a [`Token`] stream.
+//!
+//! [`Lexer`] implements [`Iterator<Item = Token>`] so it can be used directly
+//! in a `for` loop or with iterator adaptors such as `.peekable()`.
+//! Whitespace and `#`-line-comments are skipped automatically between tokens.
+//!
+//! # Character classes
+//! - **Identifiers / keywords** — start with a
+//!   [XID_Start](https://unicode.org/reports/tr31/) character or `_`, continue
+//!   with XID_Continue characters.  Reserved words are mapped to their
+//!   respective [`TokenKind`] variants; everything else becomes
+//!   [`TokenKind::Identifier`].
+//! - **Integer literals** — decimal by default; `0x` / `0o` / `0b` prefixes
+//!   select hexadecimal, octal, and binary respectively.
+//! - **String literals** — delimited by `"…"`; `\` escapes the next character.
+//! - **Operators and punctuation** — single- or double-character tokens
+//!   dispatched via the `token!` macro with one character of lookahead.
 use std::{iter::Peekable, str::Chars};

 use unicode_xid::UnicodeXID;

 use crate::token::{Span, Token, TokenKind};

+/// A lazy iterator over the [`Token`]s of a source string.
+///
+/// Tokens borrow their text slice directly from the original source, so the
+/// lexer lifetime `'src` must outlive any use of the produced tokens.
+///
+/// Construct with [`Lexer::new`] and consume via the [`Iterator`] impl or by
+/// passing it to the parser.
 pub struct Lexer<'src> {
+    /// One-character look-ahead over the source characters.
    chars: Peekable<Chars<'src>>,
+    /// The full source text, kept for slice extraction in [`make`](Self::make).
    source: &'src str,
+    /// Current byte offset into `source`.  Advanced by [`advance`](Self::advance).
    position: usize,
 }

 impl<'src> Lexer<'src> {
+    /// Creates a new [`Lexer`] positioned at the start of `source`.
    pub fn new(source: &'src str) -> Self {
        Self {
            chars: source.chars().peekable(),
@@ -24,22 +52,29 @@ impl<'src> Lexer<'src> {
        self.chars.peek().copied()
    }

-    /// Consume and return the next character.
-    /// This method panics if called at the end of input.
+    /// Consume and return the next character, advancing [`position`](Self::position)
+    /// by the character's UTF-8 byte length.
+    ///
+    /// # Panics
+    /// Panics if called at the end of input.  Always guard with
+    /// [`peek`](Self::peek) first.
    fn advance(&mut self) -> char {
        let ch = self.chars.next().expect("failed to advance the lexer");
        self.position += ch.len_utf8();
        ch
    }

-    /// Advance while `condition` holds.
+    /// Advance while `condition` holds, stopping at the first character for
+    /// which it returns `false` (or at end of input).
    fn advance_while(&mut self, condition: impl FnMut(char) -> bool + Copy) {
        while self.peek().is_some_and(condition) {
            self.advance();
        }
    }

-    /// Build a token from `[start, self.pos)`.
+    /// Construct a [`Token`] spanning the byte range `[start, self.position)`.
+    ///
+    /// The token's `text` is a zero-copy slice of the source string.
    fn make(&self, kind: TokenKind, start: usize) -> Token<'src> {
        Token {
            kind,
@@ -48,7 +83,11 @@ impl<'src> Lexer<'src> {
        }
    }

-    /// Skip all whitespace and comments.
+    /// Skip any run of whitespace followed by a `#` line comment, repeating
+    /// until neither is present.
+    ///
+    /// Comments begin with `#` and extend to (but do not include) the
+    /// following `\n`.
    fn skip_whitespace_and_comments(&mut self) {
        loop {
            self.advance_while(char::is_whitespace);
@@ -61,7 +100,12 @@ impl<'src> Lexer<'src> {
        }
    }

-    /// Lexes the next identifier token.
+    /// Lex the next identifier or keyword token.
+    ///
+    /// Assumes the current peek character satisfies `is_xid_start() || == '_'`.
+    /// Consumes one XID_Start (or `_`) character followed by any number of
+    /// XID_Continue characters, then matches the resulting slice against the
+    /// keyword / type-keyword table.
    fn next_identifier(&mut self) -> TokenKind {
        let start = self.position;

@@ -72,6 +116,7 @@ impl<'src> Lexer<'src> {
            "and" => TokenKind::KwAnd,
            "or" => TokenKind::KwOr,
            "as" => TokenKind::KwAs,
+            "let" => TokenKind::KwLet,

            "u8" => TokenKind::TyU8,
            "u16" => TokenKind::TyU16,
@@ -90,7 +135,12 @@ impl<'src> Lexer<'src> {
        }
    }

-    /// Lexes the next number token.
+    /// Lex the next integer literal token.
+    ///
+    /// Assumes the current peek character is an ASCII digit.  Detects an
+    /// optional radix prefix (`0x` → 16, `0o` → 8, `0b` → 2) then consumes
+    /// all subsequent digits valid for that radix.  Always returns
+    /// [`TokenKind::LitInt`].
    fn next_number(&mut self) -> TokenKind {
        let radix = match self.advance() {
            '0' => match self.peek() {
@@ -116,7 +166,15 @@ impl<'src> Lexer<'src> {
        TokenKind::LitInt
    }

-    /// Lexes the next string token.
+    /// Lex the next string literal token.
+    ///
+    /// Assumes the current peek character is `"`.  Consumes characters until
+    /// a closing (unescaped) `"` is found or input is exhausted.  A `\`
+    /// escapes the immediately following character, preventing it from being
+    /// treated as a closing delimiter.  Always returns [`TokenKind::LitString`].
+    ///
+    /// Note: escape sequences are not validated here; that is left to a later
+    /// compiler stage.
    fn next_string(&mut self) -> TokenKind {
        let mut escaped = false;

@@ -144,11 +202,25 @@ impl<'src> Lexer<'src> {
 impl<'src> Iterator for Lexer<'src> {
    type Item = Token<'src>;

+    /// Returns the next [`Token`], or `None` when the source is exhausted.
+    ///
+    /// Leading whitespace and `#`-comments are skipped before each token.
+    /// Multi-character operator tokens (`->`, `<<`, `<=`, …) are resolved with
+    /// a single character of lookahead via the `token!` macro.  Unrecognised
+    /// characters are returned as [`TokenKind::Unknown`].
    fn next(&mut self) -> Option<Self::Item> {
        self.skip_whitespace_and_comments();

        let start = self.position;

+        /// Builds and evaluates a [`TokenKind`] from the current position.
+        ///
+        /// Three forms:
+        /// - `token!($kind)` — single-character token: advance once, yield `$kind`.
+        /// - `token!($c => $kind, … ; $default)` — multi-character token with
+        ///   lookahead: advance once (consuming the lead character), then
+        ///   check the next character against each `$c => $kind` arm in order,
+        ///   falling back to `$default` if none match.
        macro_rules! token {
            // Case 1: Simple token (no lookahead)
            ($default:expr) => {{
--- a/src/main.rs
+++ b/src/main.rs
@@ -30,7 +30,7 @@ fn main() {
        println!("-- {} --", file.display());
        let mut parser = Parser::new(&content);

-        match parser.parse_expression(0) {
+        match parser.parse_statement() {
            Ok(ast) => println!("{ast:#?}"),
            Err(diag) => diag.report(file, &content),
        }
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -1,3 +1,9 @@
+//! Recursive-descent / Pratt parser that converts a token stream into an AST.
+//!
+//! The entry points are [`Parser::parse_statement`], [`Parser::parse_type`],
+//! and [`Parser::parse_expression`].
+//! Errors are represented as [`Diagnostic`] values; the caller is responsible
+//! for reporting them.
 use std::iter::Peekable;

 use crate::ast;
@@ -5,15 +11,24 @@ use crate::diagnostic::{Diagnostic, Severity};
 use crate::lexer::Lexer;
 use crate::token::{Token, TokenKind};

-/// The [Parser] consumes the [Token]s produced by the [Lexer] and constructs
-/// an [ast] in the [ast::Parsed] phase.
+/// Consumes the [`Token`] stream produced by the [`Lexer`] and constructs an
+/// AST in the [`ast::Parsed`] phase.
+///
+/// The parser uses a single token of look-ahead (peek) for all decisions.
+/// Expression parsing is implemented with the
+/// [Pratt / top-down operator-precedence](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html)
+/// algorithm; binding-power tables are defined by [`infix_binding_power`],
+/// [`prefix_binding_power`], and [`postfix_binding_power`].
 pub struct Parser<'src> {
    tokens: Peekable<Lexer<'src>>,
+    /// Diagnostics accumulated during parsing. Non-fatal errors are pushed here
+    /// so that the parser can attempt to continue and surface multiple issues
+    /// in a single pass.
    errors: Vec<Diagnostic>,
 }

 impl<'src> Parser<'src> {
-    /// Constructs a new [Parser] with the given source text.
+    /// Constructs a new [`Parser`] with the given source text.
    pub fn new(source: &'src str) -> Self {
        Self {
            tokens: Lexer::new(source).peekable(),
@@ -21,35 +36,40 @@ impl<'src> Parser<'src> {
        }
    }

-    /// Peek at the next [Token] without consuming it.
+    /// Peek at the next [`Token`] without consuming it.
    fn peek(&mut self) -> Option<Token<'src>> {
        self.tokens.peek().copied()
    }

-    /// Peek at the next [Token] and return a [Diagnostic] if we reached the end of input.
+    /// Peek at the next [`Token`], returning an [`Err`] diagnostic if the
+    /// token stream is exhausted.
    fn peek_no_eof(&mut self) -> Result<Token<'src>, Diagnostic> {
        self.peek()
            .ok_or_else(|| Diagnostic::new(Severity::Error, "unexpected end of input"))
    }

-    /// Check if the peek [Token] is of a given [TokenKind].
+    /// Returns `true` if the next token has the given [`TokenKind`].
    fn is_peek(&mut self, kind: TokenKind) -> bool {
        self.peek().map_or(false, |tok| tok.is(kind))
    }

-    /// Check if we have reached the end of input.
+    /// Returns `true` if the token stream is exhausted.
    fn is_at_eof(&mut self) -> bool {
        self.peek().is_none()
    }

-    /// Consumes and returns the next [Token].
-    /// This method panics if called at the end of input.
+    /// Consumes and returns the next [`Token`].
+    ///
+    /// # Panics
+    /// Panics if called at the end of input.  Always check [`is_at_eof`](Self::is_at_eof)
+    /// or use [`peek_no_eof`](Self::peek_no_eof) / [`expect`](Self::expect) in
+    /// production code paths.
    fn advance(&mut self) -> Token<'src> {
        self.tokens.next().expect("failed to advance the parser")
    }

-    /// Consumes and returns the next [Token], if it is of a given [TokenKind],
-    /// otherwise returns an [Err].
+    /// Consumes and returns the next [`Token`] if it matches `kind`; otherwise
+    /// returns an [`Err`] diagnostic that points at the offending token.
    fn expect(&mut self, kind: TokenKind) -> Result<Token<'src>, Diagnostic> {
        match self.peek() {
            Some(tok) if tok.is(kind) => Ok(self.advance()),
@@ -63,8 +83,11 @@ impl<'src> Parser<'src> {
        }
    }

-    /// Skips [Token]s until we reach a neutral statement boundary, so that
-    /// subsequent statements can still be parsed cleanly.
+    /// Error-recovery helper: skips tokens until a statement boundary is
+    /// reached so that subsequent statements can still be parsed cleanly.
+    ///
+    /// Stops *after* consuming a `;`, or *before* consuming a `}`.  This keeps
+    /// nested blocks intact when recovering inside function bodies.
    fn synchronize(&mut self) {
        while let Some(peek) = self.peek() {
            match peek.kind {
@@ -82,6 +105,76 @@ impl<'src> Parser<'src> {
        }
    }

+    /// Parses the next statement.
+    ///
+    /// Dispatches to the appropriate specialised parser based on the leading
+    /// token:
+    /// - `let` → [`parse_let_statement`](Self::parse_let_statement)
+    /// - anything else → an expression followed by a mandatory `;`
+    pub fn parse_statement(&mut self) -> Result<ast::ParsedStatement, Diagnostic> {
+        let peek = self.peek_no_eof()?;
+
+        match peek.kind {
+            TokenKind::KwLet => self.parse_let_statement(),
+
+            _ => {
+                let expr = self.parse_expression(0)?;
+                let semi_token = self.expect(TokenKind::Semi)?;
+                let span = expr.span.extend(semi_token.span);
+
+                Ok(ast::ParsedStatement {
+                    kind: ast::StatementKind::Expr(expr),
+                    span,
+                    extra: (),
+                })
+            }
+        }
+    }
+
+    /// Parses a `let` binding statement: `let <name>[: <type>] [= <expr>];`.
+    ///
+    /// Both the type annotation and the initialiser are optional.  The
+    /// statement span runs from the `let` keyword through to the closing `;`.
+    fn parse_let_statement(&mut self) -> Result<ast::ParsedStatement, Diagnostic> {
+        let let_token = self.expect(TokenKind::KwLet)?;
+
+        let (name, name_span) = {
+            let ident_token = self.expect(TokenKind::Identifier)?;
+            (ident_token.text.to_string(), ident_token.span)
+        };
+
+        let ty = if self.is_peek(TokenKind::Colon) {
+            self.advance();
+            Some(self.parse_type()?)
+        } else {
+            None
+        };
+
+        let value = if self.is_peek(TokenKind::Assign) {
+            self.advance();
+            Some(self.parse_expression(0)?)
+        } else {
+            None
+        };
+
+        let semi_token = self.expect(TokenKind::Semi)?;
+
+        Ok(ast::ParsedStatement {
+            kind: ast::StatementKind::Let {
+                name,
+                name_span,
+                ty,
+                value,
+            },
+            span: let_token.span.extend(semi_token.span),
+            extra: (),
+        })
+    }
+
+    /// Parses a type annotation, e.g. `u8`, `i64`, `bool`, or a user-defined
+    /// named type.
+    ///
+    /// Returns an [`Err`] diagnostic if the next token is not a valid type.
    pub fn parse_type(&mut self) -> Result<ast::ParsedType, Diagnostic> {
        let peek = self.peek_no_eof()?;

@@ -112,7 +205,24 @@ impl<'src> Parser<'src> {
        })
    }

-    /// Parses an [ast::Expression] using the pratt parsing algorithm.
+    /// Parses an expression using the Pratt (top-down operator-precedence)
+    /// algorithm.
+    ///
+    /// `min_bp` is the minimum *left* binding power the next infix/postfix
+    /// operator must have to be incorporated into the current expression.  Pass
+    /// `0` to parse a full expression with no restrictions.
+    ///
+    /// The precedence hierarchy (low → high) is:
+    /// - assignment (`=`)
+    /// - logical `or` / `and`
+    /// - bitwise `|` / `^` / `&`
+    /// - equality (`==`, `!=`) and comparison (`<`, `<=`, `>`, `>=`)
+    /// - addition / subtraction
+    /// - shifts (`<<`, `>>`)
+    /// - multiplication / division / remainder
+    /// - member access (`.`)
+    /// - postfix: call `()`, index `[]`, cast `as`
+    /// - prefix: `-`, `&`, `~`, `*`, `!`
    pub fn parse_expression(&mut self, min_bp: u8) -> Result<ast::ParsedExpression, Diagnostic> {
        let peek_token = self.peek_no_eof()?;

@@ -182,7 +292,11 @@ impl<'src> Parser<'src> {
        Ok(left)
    }

-    /// Parses a primary expression, e.g. literals, unary or grouped expression.
+    /// Parses a primary (non-operator) expression: an identifier, integer
+    /// literal, boolean literal, or a parenthesised expression.
+    ///
+    /// Integer literals support `0x` (hex), `0o` (octal), and `0b` (binary)
+    /// prefixes in addition to plain decimal.
    fn parse_primary_expression(&mut self) -> Result<ast::ParsedExpression, Diagnostic> {
        let peek_token = self.peek_no_eof()?;

@@ -252,7 +366,10 @@ impl<'src> Parser<'src> {
        }
    }

-    /// Parses a [ast::ExpressionKind::Call] expression.
+    /// Parses a function-call expression `func(arg, …)`.
+    ///
+    /// The opening `(` is consumed here; `func` is the already-parsed callee
+    /// expression passed in from the Pratt loop.
    fn parse_call_expr(
        &mut self,
        func: ast::ParsedExpression,
@@ -281,7 +398,10 @@ impl<'src> Parser<'src> {
        })
    }

-    /// Parses an [ast::ExpressionKind::Index] expression.
+    /// Parses an index expression `expr[index]`.
+    ///
+    /// The opening `[` is consumed here; `expr` is the already-parsed
+    /// collection expression passed in from the Pratt loop.
    fn parse_index_expr(
        &mut self,
        expr: ast::ParsedExpression,
@@ -303,7 +423,10 @@ impl<'src> Parser<'src> {
        })
    }

-    /// Parses an [ast::ExpressionKind::Cast] expression.
+    /// Parses a cast expression `expr as Type`.
+    ///
+    /// The `as` keyword is consumed here; `expr` is the already-parsed value
+    /// expression passed in from the Pratt loop.
    fn parse_cast_expr(
        &mut self,
        expr: ast::ParsedExpression,
@@ -324,6 +447,12 @@ impl<'src> Parser<'src> {
    }
 }

+/// Returns `(left_bp, right_bp, op)` for infix operators, or `None` if `kind`
+/// is not an infix operator.
+///
+/// The two binding-power values implement associativity: equal values give
+/// left-associativity, and `right_bp = left_bp` gives right-associativity
+/// (currently used for `=`).
 fn infix_binding_power(kind: TokenKind) -> Option<(u8, u8, ast::BinaryOp)> {
    Some(match kind {
        TokenKind::Assign => (2, 2, ast::BinaryOp::Assign),
@@ -359,6 +488,11 @@ fn infix_binding_power(kind: TokenKind) -> Option<(u8, u8, ast::BinaryOp)> {
    })
 }

+/// Returns `(right_bp, op)` for prefix operators, or `None` if `kind` is not
+/// a prefix operator.
+///
+/// All prefix operators currently share the same binding power (`80`), giving
+/// them higher precedence than any binary operator.
 fn prefix_binding_power(kind: TokenKind) -> Option<(u8, ast::UnaryOp)> {
    Some(match kind {
        TokenKind::Minus => (80, ast::UnaryOp::Neg),
@@ -371,6 +505,12 @@ fn prefix_binding_power(kind: TokenKind) -> Option<(u8, ast::UnaryOp)> {
    })
 }

+/// Returns the *left* binding power for postfix operators, or `None` if `kind`
+/// is not a postfix operator.
+///
+/// Postfix operators (`()`, `[]`, `as`) bind tighter than all binary operators
+/// but are checked before prefix operators in the Pratt loop so they always
+/// apply to the nearest sub-expression.
 fn postfix_binding_power(kind: TokenKind) -> Option<u8> {
    Some(match kind {
        TokenKind::LParen => 100,
--- a/src/token.rs
+++ b/src/token.rs
@@ -1,28 +1,50 @@
+//! Token definitions used by the [`Lexer`](crate::lexer::Lexer) and
+//! [`Parser`](crate::parser::Parser).
+//!
+//! The two core types are:
+//! - [`Span`] — a half-open byte range that marks a location in source text.
+//! - [`Token`] — a classified slice of source text together with its span.
+//!
+//! [`TokenKind`] enumerates every token variant; its [`Display`](std::fmt::Display)
+//! impl produces the human-readable representation used in diagnostics.
 use std::fmt;

-/// A Span is a half-open byte range `[start, end)` which marks a location in
-/// the source string. The start and end positions are stored as a [u32] which
-/// limits us to a maximum source file size of 4 gigabytes.
+/// A half-open byte range `[start, end)` that marks a location in the source
+/// string.
+///
+/// Positions are stored as [`u32`], which limits supported source files to
+/// 4 GiB — more than sufficient for any practical source file.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub struct Span {
+    /// Inclusive start byte offset.
    pub start: u32,
+    /// Exclusive end byte offset.
    pub end: u32,
 }

 impl Span {
+    /// Creates a new span covering `[start, end)`.
    pub const fn new(start: u32, end: u32) -> Self {
        Self { start, end }
    }

+    /// Returns the length of the span in bytes.
+    ///
+    /// Uses saturating subtraction so an inverted span returns `0` rather than
+    /// wrapping.
    pub fn len(&self) -> u32 {
        self.end.saturating_sub(self.start)
    }

+    /// Returns `true` if the span covers zero bytes (`start == end`).
    pub fn is_empty(&self) -> bool {
        self.start == self.end
    }

-    /// Extend this [Span] to cover `other` as well.
+    /// Returns the smallest span that covers both `self` and `other`.
+    ///
+    /// This is the union of the two ranges, useful for computing the span of a
+    /// parent node from its children.
    pub fn extend(self, other: Self) -> Self {
        Self {
            start: self.start.min(other.start),
@@ -37,8 +59,10 @@ impl fmt::Display for Span {
    }
 }

-/// This macro helps with defining the different kinds of [Token]s. It
-/// simultaneously defines a variant and its [fmt::Display] implementation.
+/// Simultaneously defines the [`TokenKind`] enum and its [`fmt::Display`] impl.
+///
+/// Each arm maps a variant name to the human-readable string used in
+/// diagnostics (e.g. `` `+` ``, `identifier`).
 macro_rules! define_tokens {
    ($($name:ident => $repr:literal),*  $(,)?) => {
        #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@@ -69,6 +93,7 @@ define_tokens! {
    KwAnd => "`and`",
    KwOr  => "`or`",
    KwAs  => "`as`",
+    KwLet => "`let`",

    // -- Type Keywords --
    TyU8   => "`u8`",
@@ -97,7 +122,7 @@ define_tokens! {
    Shr   => "`>>`",
    Bang  => "`!`",

-    // -- Comparision Operators --
+    // -- Comparison Operators --
    Eq => "`==`",
    Ne => "`!=`",
    Lt => "`<`",
@@ -125,17 +150,23 @@ define_tokens! {
    Unknown => "unknown character"
 }

-/// A Token represents the smallest continous unit of the source code. It holds
-/// its [TokenKind], [Span] and source text.
+/// The smallest contiguous unit of source text, as produced by the
+/// [`Lexer`](crate::lexer::Lexer).
+///
+/// A token borrows its [`text`](Token::text) slice directly from the original
+/// source string, so the lifetime `'src` ties every token to that source.
 #[derive(Debug, Clone, Copy)]
 pub struct Token<'src> {
+    /// The syntactic category of this token.
    pub kind: TokenKind,
+    /// The byte range in the source string where this token appears.
    pub span: Span,
+    /// The raw source text of this token (a zero-copy slice).
    pub text: &'src str,
 }

 impl<'src> Token<'src> {
-    /// Checks if the current [Token] is of given [TokenKind].
+    /// Returns `true` if this token has the given [`TokenKind`].
    pub fn is(&self, kind: TokenKind) -> bool {
        self.kind == kind
    }
--- a/test.bky
+++ b/test.bky
@@ -1 +1 @@
-foo.bar - 5 as i32
+let test: i32 = foo.bar - 5 as i32;
Author	SHA1	Message	Date
Jooris Hadeler	1107c7d93d	feat: Add support for `let` and expression statements. This commit implements parsing for `let` statements, anything that is not a let statement will be assumed to be an expression statement.	2026-03-12 21:23:09 +01:00
Jooris Hadeler	bb9cb8d2d1	docs: Add and improve documentation for every module.	2026-03-12 20:44:41 +01:00