feat: Add support for compound statements.

This commit adds parsing logic for compound statements.
feat: Add support for let and expression statements.
2026-03-12 21:42:40 +01:00 · 2026-03-12 21:23:09 +01:00 · 2026-03-12 20:44:41 +01:00 · 2026-03-12 12:50:17 +01:00 · 2026-03-12 12:14:00 +01:00 · 2026-03-11 23:42:43 +01:00
14 changed files with 1714 additions and 10 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6,9 +6,16 @@ version = 4
 name = "buckyc"
 version = "0.1.0"
 dependencies = [
+ "unicode-xid",
 "yansi",
 ]

+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
 [[package]]
 name = "yansi"
 version = "1.0.1"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,4 +4,5 @@ version = "0.1.0"
 edition = "2024"

 [dependencies]
+unicode-xid = "0.2.6"
 yansi = "1.0.1"
--- a/README.md
+++ b/README.md
@@ -0,0 +1,27 @@
+<div align="center">
+  <img src="logo.svg" alt="Bucky Logo" width="200" height="200">
+  
+  # Bucky
+  
+  **Bucky** is a compiled, statically typed programming language with a Rust-inspired syntax, designed for performance, safety, and expressiveness.
+</div>
+
+## Features
+
+- **Compiled**: Produces native binaries for high performance
+- **Statically Typed**: Strong type checking at compile time
+- **Rust-Inspired Syntax**: Familiar, expressive, and modern
+- **Explicit and Predictable**: Emphasizes clarity and correctness
+- **Safety-Oriented Design**: Encourages writing robust code by default
+
+## Examples
+
+```bucky
+fn fib(n: u64): u64 {
+    if n < 2 {
+        return n;
+    }
+
+    return fib(n - 1) + fib(n - 2);
+}
+```
--- a/examples/fibonacci.bky
+++ b/examples/fibonacci.bky
@@ -0,0 +1,7 @@
+fn fib(n: u64) -> u64 {
+    if n < 2 {
+        return n;
+    }
+
+    return fib(n - 1) + fib(n - 2);
+}
--- a/examples/hello-world.bky
+++ b/examples/hello-world.bky
@@ -0,0 +1,5 @@
+extern puts(text: *char);
+
+fn main() {
+    puts("Hello, World!");
+}
--- a/logo.svg
+++ b/logo.svg
@@ -0,0 +1,33 @@
+<svg width="512" height="512" viewBox="0 0 512 512" fill="none" xmlns="http://www.w3.org/2000/svg">
+  <circle cx="256" cy="256" r="256" fill="#F7F5F0"/>
+  
+  <defs>
+    <linearGradient id="gradientWarm" x1="100" y1="400" x2="200" y2="100" gradientUnits="userSpaceOnUse">
+      <stop offset="0%" stop-color="#D97042" /> <stop offset="100%" stop-color="#E89F57" /> </linearGradient>
+    
+    <linearGradient id="gradientCool" x1="412" y1="400" x2="312" y2="100" gradientUnits="userSpaceOnUse">
+      <stop offset="0%" stop-color="#2D4F56" /> <stop offset="100%" stop-color="#4AA096" /> </linearGradient>
+    
+    <filter id="softShadow" x="-20%" y="-20%" width="140%" height="140%">
+      <feDropShadow dx="0" dy="4" stdDeviation="6" flood-color="#000" flood-opacity="0.1"/>
+    </filter>
+  </defs>
+
+  <g filter="url(#softShadow)">
+    
+    <path d="M 235 410 C 235 410, 160 360, 145 220 C 140 170, 155 130, 155 130" 
+          stroke="url(#gradientWarm)" stroke-width="32" stroke-linecap="round" stroke-linejoin="round"/>
+    <path d="M 178 340 C 195 340, 110 310, 82 250" 
+          stroke="url(#gradientWarm)" stroke-width="28" stroke-linecap="round" stroke-linejoin="round"/>
+    <path d="M 160 270 C 160 270, 200 230, 210 180" 
+          stroke="url(#gradientWarm)" stroke-width="26" stroke-linecap="round" stroke-linejoin="round"/>
+
+    <path d="M 277 410 C 277 410, 352 360, 367 220 C 372 170, 357 130, 357 130" 
+          stroke="url(#gradientCool)" stroke-width="32" stroke-linecap="round" stroke-linejoin="round"/>
+    <path d="M 335 340 C 317 340, 402 310, 430 250" 
+          stroke="url(#gradientCool)" stroke-width="28" stroke-linecap="round" stroke-linejoin="round"/>
+    <path d="M 352 270 C 352 270, 312 230, 302 180" 
+          stroke="url(#gradientCool)" stroke-width="26" stroke-linecap="round" stroke-linejoin="round"/>
+          
+  </g>
+</svg>
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -0,0 +1,251 @@
+//! Abstract Syntax Tree (AST) definitions.
+//!
+//! The AST is parameterised over a [`Phase`] type-state so that the same node
+//! types can carry different amounts of information at different compiler
+//! stages.  Currently only the [`Parsed`] phase exists, which attaches no
+//! extra data (`()`) to each node.
+//!
+//! The primary node families are:
+//! - [`Expression`] / [`ExpressionKind`] — value-producing constructs.
+//! - [`Type`] / [`TypeKind`] — type annotations.
+//! - [`Statement`] / [`StatementKind`] — top-level and block-level statements.
+use std::fmt::Debug;
+
+use crate::token::Span;
+
+/// Marker trait that carries phase-specific associated data for AST nodes.
+///
+/// Each phase defines an [`ExtraData`](Phase::ExtraData) type that is embedded
+/// in every node.  This allows later compiler passes (e.g. type-checking) to
+/// augment the tree without duplicating the node hierarchy.
+///
+/// Current phases:
+/// - [`Parsed`] — produced directly by the parser; no extra data.
+pub trait Phase {
+    type ExtraData: PartialEq + Debug;
+}
+
+/// The initial AST phase produced by the parser.
+///
+/// In this phase [`Phase::ExtraData`] is `()`, meaning nodes carry only
+/// syntactic information (kind + source span).
+#[derive(Debug)]
+pub struct Parsed;
+
+impl Phase for Parsed {
+    type ExtraData = ();
+}
+
+/// Convenience alias for an [`Expression`] in the [`Parsed`] phase.
+pub type ParsedExpression = Expression<Parsed>;
+
+/// A value-producing node in the AST.
+///
+/// Every expression carries:
+/// - [`kind`](Expression::kind) — what *kind* of expression it is.
+/// - [`span`](Expression::span) — the source location it was parsed from.
+/// - [`extra`](Expression::extra) — phase-specific data (see [`Phase`]).
+#[derive(Debug, PartialEq)]
+pub struct Expression<P: Phase> {
+    pub kind: ExpressionKind<P>,
+    pub span: Span,
+    pub extra: P::ExtraData,
+}
+
+/// The concrete variant of an [`Expression`].
+#[derive(Debug, PartialEq)]
+pub enum ExpressionKind<P: Phase> {
+    /// A bare name, e.g. `foo`.
+    Identifier(String),
+
+    /// A string literal, e.g. `"hello"`.
+    LitString(String),
+    /// An integer literal, e.g. `42`, `0xFF`, `0b1010`.  The value is stored
+    /// as a `u64` regardless of the source radix.
+    LitInteger(u64),
+    /// A boolean literal: `true` or `false`.
+    LitBool(bool),
+
+    /// A prefix unary expression, e.g. `-x`, `!cond`, `*ptr`.
+    Unary {
+        op: UnaryOp,
+        /// Source span of the operator token itself.
+        op_span: Span,
+        operand: Box<Expression<P>>,
+    },
+
+    /// An infix binary expression, e.g. `a + b`, `x == y`.
+    Binary {
+        op: BinaryOp,
+        /// Source span of the operator token itself.
+        op_span: Span,
+        left: Box<Expression<P>>,
+        right: Box<Expression<P>>,
+    },
+
+    /// A function call, e.g. `f(a, b)`.
+    Call {
+        /// The callee expression (often an [`Identifier`](ExpressionKind::Identifier)).
+        func: Box<Expression<P>>,
+        args: Vec<Expression<P>>,
+    },
+
+    /// An index expression, e.g. `arr[i]`.
+    Index {
+        expr: Box<Expression<P>>,
+        index: Box<Expression<P>>,
+    },
+
+    /// A type-cast expression, e.g. `x as u32`.
+    Cast {
+        expr: Box<Expression<P>>,
+        ty: Box<Type<P>>,
+    },
+}
+
+/// A prefix unary operator.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum UnaryOp {
+    /// Bitwise complement (`~`)
+    BitNot,
+    /// Logical negation (`!`)
+    Not,
+    /// Arithmetic negation (`-`)
+    Neg,
+    /// Address-of (`&`)
+    AddrOf,
+    /// Pointer dereference (`*`)
+    Deref,
+}
+
+/// An infix binary operator.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum BinaryOp {
+    /// Addition (`+`)
+    Add,
+    /// Subtraction (`-`)
+    Sub,
+    /// Multiplication (`*`)
+    Mul,
+    /// Division (`/`)
+    Div,
+    /// Remainder (`%`)
+    Rem,
+
+    /// Bitwise AND (`&`)
+    BitAnd,
+    /// Bitwise OR (`|`)
+    BitOr,
+    /// Bitwise XOR (`^`)
+    BitXor,
+    /// Left shift (`<<`)
+    BitShl,
+    /// Right shift (`>>`)
+    BitShr,
+
+    /// Logical AND (`and`)
+    And,
+    /// Logical OR (`or`)
+    Or,
+
+    /// Equality (`==`)
+    Eq,
+    /// Inequality (`!=`)
+    Ne,
+    /// Less-than (`<`)
+    Lt,
+    /// Less-than-or-equal (`<=`)
+    Le,
+    /// Greater-than (`>`)
+    Gt,
+    /// Greater-than-or-equal (`>=`)
+    Ge,
+
+    /// Assignment (`=`)
+    Assign,
+    /// Member access (`.`)
+    Dot,
+}
+
+/// Convenience alias for a [`Type`] in the [`Parsed`] phase.
+pub type ParsedType = Type<Parsed>;
+
+/// A type annotation node in the AST.
+#[derive(Debug, PartialEq)]
+pub struct Type<P: Phase> {
+    pub kind: TypeKind,
+    pub span: Span,
+    pub extra: P::ExtraData,
+}
+
+/// The concrete variant of a [`Type`] annotation.
+#[derive(Debug, PartialEq)]
+pub enum TypeKind {
+    /// Signed integers
+    I8,
+    I16,
+    I32,
+    I64,
+
+    /// Unsigned integers
+    U8,
+    U16,
+    U32,
+    U64,
+
+    /// Boolean type (`bool`)
+    Bool,
+
+    /// A user-defined named type, e.g. `MyStruct`.
+    Named(String),
+}
+
+/// Convenience alias for a [`Statement`] in the [`Parsed`] phase.
+pub type ParsedStatement = Statement<Parsed>;
+
+/// A statement node in the AST.
+///
+/// Statements are the sequential building blocks of a block body.  Like
+/// [`Expression`] and [`Type`], a statement is parameterised over a [`Phase`]
+/// so that later compiler passes can attach additional information without
+/// changing the node layout.
+#[derive(Debug, PartialEq)]
+pub struct Statement<P: Phase> {
+    pub kind: StatementKind<P>,
+    pub span: Span,
+    pub extra: P::ExtraData,
+}
+
+/// The concrete variant of a [`Statement`].
+#[derive(Debug, PartialEq)]
+pub enum StatementKind<P: Phase> {
+    /// A `let` binding, e.g. `let x: i32 = 0;`.
+    ///
+    /// Both the type annotation and the initialiser are optional at the parse
+    /// stage and may be filled in or validated by later passes.
+    Let {
+        /// The name of the binding.
+        name: String,
+        /// Source span of the name token, used for diagnostics.
+        name_span: Span,
+        /// Optional explicit type annotation (`let x: T`).
+        ty: Option<Type<P>>,
+        /// Optional initialiser expression (`= <expr>`).
+        value: Option<Expression<P>>,
+    },
+
+    /// A braced block of statements, e.g. `{ let x = 1; f(x); }`.
+    ///
+    /// Compound statements introduce a new scope and can appear anywhere a
+    /// statement is expected.
+    Compound {
+        /// The statements contained within the block, in source order.
+        inner: Vec<Statement<P>>,
+    },
+
+    /// A bare expression statement, e.g. `f(x);`.
+    ///
+    /// The trailing `;` is not stored in the node but is included in
+    /// [`Statement::span`].
+    Expr(Expression<P>),
+}
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -1,7 +1,18 @@
+//! Command-line interface: argument parsing, help/version output, and fatal
+//! error reporting.
+//!
+//! The primary entry point is [`parse_args`], which parses [`std::env::args`]
+//! and returns an [`Opts`] struct.  If any argument is invalid or required
+//! arguments are missing, it calls [`fatal`] which prints an error to `stderr`
+//! and exits with code `1`.
 use std::path::PathBuf;

 use yansi::Paint;

+/// Print the help message to `stdout`.
+///
+/// Describes the compiler's usage, all supported options, and the `<file>`
+/// positional argument.
 pub fn print_help() {
    println!(
        "{} {} - the bucky language compiler",
@@ -42,32 +53,57 @@ pub fn print_help() {
    println!();
    println!("{}", "ARGS:".bold().yellow());
    println!(
-        "  {}    One or more Flux source files to compile",
+        "  {}    One or more source files to compile",
        "<file>".bold(),
    );
 }

+/// Print the compiler version string (`buckyc <version>`) to `stdout`.
 pub fn print_version() {
    println!("buckyc {}", env!("CARGO_PKG_VERSION"));
 }

+/// Print a formatted error message to `stderr` and exit with code `1`.
+///
+/// This function never returns (`-> !`).  Use it for unrecoverable CLI errors
+/// such as missing arguments or unknown flags, discovered before compilation
+/// begins.
 pub fn fatal(message: impl ToString) -> ! {
    eprintln!("{}: {}", "error".bold().red(), message.to_string().bold());
    std::process::exit(1);
 }

+/// Parsed command-line options returned by [`parse_args`].
 #[derive(Debug)]
 pub struct Opts {
-    /// The list of files passed to the compiler.
+    /// One or more source files to compile, in the order they were supplied.
    pub files: Vec<PathBuf>,
-    /// `-S`: emit IR and stop (implies `-c`).
+    /// `-S`: emit IR and stop (implies [`no_link`](Opts::no_link)).
    pub emit_ir: bool,
-    /// `-c`: compile source to object file without linking.
+    /// `-c`: compile to an object file without invoking the linker.
    pub no_link: bool,
-    /// `-o <file>`: write final output to this path.
+    /// `-o <file>`: destination path for the final output.  When `None` the
+    /// compiler chooses a default output name.
    pub output: Option<PathBuf>,
 }

+/// Parse [`std::env::args`] and return the resulting [`Opts`].
+///
+/// Recognised flags:
+///
+/// | Flag | Effect |
+/// |------|--------|
+/// | `-h`, `--help` | Print help and exit `0` |
+/// | `-V`, `--version` | Print version and exit `0` |
+/// | `-S` | Set [`emit_ir`](Opts::emit_ir) and [`no_link`](Opts::no_link) |
+/// | `-c` | Set [`no_link`](Opts::no_link) |
+/// | `-o <file>` | Set [`output`](Opts::output) |
+/// | `<file>` | Append to [`files`](Opts::files) |
+///
+/// Calls [`fatal`] (and exits) if:
+/// - an unknown `-`-prefixed flag is encountered, or
+/// - `-o` is supplied without a following argument, or
+/// - no source files are provided.
 pub fn parse_args() -> Opts {
    let mut files = Vec::new();
    let mut no_link = false;
--- a/src/diagnostic.rs
+++ b/src/diagnostic.rs
@@ -0,0 +1,253 @@
+//! Compiler diagnostic reporting with source-location context.
+//!
+//! This module provides [`Diagnostic`], a structured error/warning message that
+//! can optionally include a source span and one or more labelled secondary
+//! spans.  Diagnostics are rendered to `stderr` in a rustc-inspired format:
+//!
+//! ```text
+//! Error: undeclared variable `x`
+//!   --> src/main.bky:3:5
+//!    |
+//!  3 | let y = x + 1;
+//!    |         ^ undeclared variable
+//!    |
+//! ```
+use std::{fmt::Display, path::Path, process::exit};
+
+use yansi::Paint;
+
+use crate::token::Span;
+
+/// The importance level of a [`Diagnostic`].
+///
+/// Variants are ordered from least to most severe so that `<` / `>` comparisons
+/// work intuitively (e.g. `Severity::Warning < Severity::Error`).
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Severity {
+    /// Purely informational; never causes the compiler to stop.
+    Note,
+    /// Something suspicious that may or may not be a problem.
+    Warning,
+    /// A recoverable problem that prevents successful compilation.
+    Error,
+    /// An unrecoverable problem; the process will exit immediately after
+    /// reporting this diagnostic.
+    Critical,
+}
+
+impl Display for Severity {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Severity::Note => write!(f, "{}", "Note".bold().cyan()),
+            Severity::Warning => write!(f, "{}", "Warning".bold().yellow()),
+            Severity::Error => write!(f, "{}", "Error".bold().red()),
+            Severity::Critical => write!(f, "{}", "Critical".bold().magenta()),
+        }
+    }
+}
+
+/// A single compiler message with optional source-location information.
+///
+/// Build a diagnostic with [`Diagnostic::new`], optionally attach a primary
+/// source location via [`with_span`](Diagnostic::with_span), attach labelled
+/// secondary locations via [`add_label`](Diagnostic::add_label), then call
+/// [`report`](Diagnostic::report) to print it.
+///
+/// If the severity is [`Severity::Critical`], `report` will call
+/// [`process::exit`](std::process::exit) after printing.
+pub struct Diagnostic {
+    pub severity: Severity,
+    /// Primary source location, if any.
+    pub span: Option<Span>,
+    pub message: String,
+    /// Secondary labelled spans rendered below the primary snippet.
+    pub labels: Vec<(Span, String)>,
+}
+
+impl Diagnostic {
+    /// Create a new diagnostic with the given severity and message.
+    ///
+    /// No source location is attached; use [`with_span`](Self::with_span) to
+    /// add one.
+    pub fn new(severity: Severity, message: impl ToString) -> Self {
+        Self {
+            severity,
+            span: None,
+            message: message.to_string(),
+            labels: Vec::new(),
+        }
+    }
+
+    /// Attach a primary source span to this diagnostic.
+    pub fn with_span(mut self, span: Span) -> Self {
+        self.span = Some(span);
+        self
+    }
+
+    /// Attach a labelled secondary span.
+    ///
+    /// Labels whose span matches the primary span exactly are merged into the
+    /// primary underline as inline text.  All other labels are rendered as
+    /// separate snippets below the primary one.
+    pub fn add_label(mut self, span: Span, message: impl ToString) -> Self {
+        self.labels.push((span, message.to_string()));
+        self
+    }
+
+    /// Print this diagnostic to `stderr` and, if the severity is
+    /// [`Severity::Critical`], terminate the process.
+    ///
+    /// # Arguments
+    /// * `file_name` – path shown in the `-->` location line.
+    /// * `source`    – full source text of the file, used to extract line/col
+    ///   information and to display the relevant source snippet.
+    pub fn report(self, file_name: &Path, source: &str) {
+        eprintln!("{}: {}", self.severity, self.message.bold());
+
+        let Some(primary_span) = self.span else {
+            eprintln!("  {} {}", "-->".bright_black(), file_name.display());
+            if self.severity == Severity::Critical {
+                exit(-1);
+            }
+            return;
+        };
+
+        // Guard: no source context available (e.g. critical error before any
+        // file is read).
+        if source.is_empty() || primary_span.start as usize >= source.len() {
+            eprintln!("  {} {}", "-->".bright_black(), file_name.display());
+            if self.severity == Severity::Critical {
+                exit(-1);
+            }
+            return;
+        }
+
+        let (primary_line, primary_col) = get_line_col(source, primary_span.start);
+
+        // Partition labels: those on the *exact same span* as the primary are
+        // merged into the primary underline as inline text.  All others are
+        // rendered as separate snippets below the primary.
+        let (same_span, other_span): (Vec<_>, Vec<_>) = self
+            .labels
+            .into_iter()
+            .partition(|(s, _)| *s == primary_span);
+
+        let primary_label: Option<String> = same_span.into_iter().next().map(|(_, m)| m);
+
+        // Gutter must be wide enough for the highest line number we'll print.
+        let max_line = other_span
+            .iter()
+            .filter(|(s, _)| (s.start as usize) < source.len())
+            .map(|(s, _)| get_line_col(source, s.start).0)
+            .fold(primary_line, usize::max);
+        let gutter_w = count_digits(max_line);
+        let pad = " ".repeat(gutter_w);
+
+        // " --> file:line:col"
+        eprintln!(
+            "{} {}:{}:{}",
+            format!("{pad} -->").bright_black(),
+            file_name.display(),
+            primary_line,
+            primary_col,
+        );
+        eprintln!("{}", format!("{pad} |").bright_black());
+
+        // Primary snippet.
+        render_snippet(
+            source,
+            primary_span,
+            primary_label.as_deref(),
+            gutter_w,
+            self.severity,
+        );
+
+        // Additional-context labels (different locations).
+        for (span, msg) in &other_span {
+            if (span.start as usize) < source.len() {
+                render_snippet(source, *span, Some(msg.as_str()), gutter_w, Severity::Note);
+            }
+        }
+
+        eprintln!("{}", format!("{pad} |").bright_black());
+
+        if self.severity == Severity::Critical {
+            exit(-1);
+        }
+    }
+}
+
+/// Render a single source-line snippet: the numbered source line followed by
+/// a `^^^` underline.  When `label` is `Some`, the text is appended after the
+/// carets on the same line.
+fn render_snippet(
+    source: &str,
+    span: Span,
+    label: Option<&str>,
+    gutter_w: usize,
+    severity: Severity,
+) {
+    let (line_num, _) = get_line_col(source, span.start);
+    let (line_start, line_content) = get_line_content(source, span.start);
+
+    let pad = " ".repeat(gutter_w);
+    let bar = format!("{}", "|".bright_black());
+    let line_num_str = format!("{:>width$}", line_num, width = gutter_w);
+
+    // "N | source text"
+    eprintln!("{} {bar} {line_content}", line_num_str.bright_black());
+
+    // Caret underline, clamped to the current line.
+    let col_offset = span.start as usize - line_start;
+    let line_end_byte = line_start + line_content.len();
+    let underline_len = (span.end as usize)
+        .min(line_end_byte)
+        .saturating_sub(span.start as usize)
+        .max(1);
+
+    let spaces = " ".repeat(col_offset);
+    let carets = "^".repeat(underline_len);
+    let colored_carets = paint_severity(&carets, severity);
+    let label_text = label
+        .map(|l| format!(" {}", paint_severity(l, severity)))
+        .unwrap_or_default();
+
+    // "  | ^^^label"
+    eprintln!("{pad} {bar} {spaces}{colored_carets}{label_text}");
+}
+
+/// Apply severity-appropriate ANSI colour to a string.
+fn paint_severity(s: &str, severity: Severity) -> String {
+    match severity {
+        Severity::Note => format!("{}", s.bold().bright_cyan()),
+        Severity::Warning => format!("{}", s.bold().bright_yellow()),
+        Severity::Error | Severity::Critical => format!("{}", s.bold().bright_red()),
+    }
+}
+
+/// Returns the number of decimal digits in `n` (minimum 1).
+fn count_digits(n: usize) -> usize {
+    format!("{n}").len()
+}
+
+/// Returns `(line_start_byte, line_content)` for the line that contains
+/// `position`.  The returned content does *not* include the trailing newline.
+fn get_line_content(source: &str, position: u32) -> (usize, &str) {
+    let pos = position as usize;
+    let line_start = source[..pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
+    let rest = &source[line_start..];
+    let line_len = rest.find('\n').unwrap_or(rest.len());
+    (line_start, &rest[..line_len])
+}
+
+/// Returns the 1-based `(line, column)` for a byte `position` within `source`.
+///
+/// Both line and column are counted from 1.  The column is measured in Unicode
+/// scalar values (characters), not bytes.
+fn get_line_col(source: &str, position: u32) -> (usize, usize) {
+    let prefix = &source[..position as usize];
+    let line = prefix.bytes().filter(|&b| b == b'\n').count() + 1;
+    let line_start_byte = prefix.rfind('\n').map(|i| i + 1).unwrap_or(0);
+    let col = prefix[line_start_byte..].chars().count() + 1;
+    (line, col)
+}
--- a/src/lexer.rs
+++ b/src/lexer.rs
@@ -0,0 +1,309 @@
+//! Lexer (tokeniser) that converts raw source text into a [`Token`] stream.
+//!
+//! [`Lexer`] implements [`Iterator<Item = Token>`] so it can be used directly
+//! in a `for` loop or with iterator adaptors such as `.peekable()`.
+//! Whitespace and `#`-line-comments are skipped automatically between tokens.
+//!
+//! # Character classes
+//! - **Identifiers / keywords** — start with a
+//!   [XID_Start](https://unicode.org/reports/tr31/) character or `_`, continue
+//!   with XID_Continue characters.  Reserved words are mapped to their
+//!   respective [`TokenKind`] variants; everything else becomes
+//!   [`TokenKind::Identifier`].
+//! - **Integer literals** — decimal by default; `0x` / `0o` / `0b` prefixes
+//!   select hexadecimal, octal, and binary respectively.
+//! - **String literals** — delimited by `"…"`; `\` escapes the next character.
+//! - **Operators and punctuation** — single- or double-character tokens
+//!   dispatched via the `token!` macro with one character of lookahead.
+use std::{iter::Peekable, str::Chars};
+
+use unicode_xid::UnicodeXID;
+
+use crate::token::{Span, Token, TokenKind};
+
+/// A lazy iterator over the [`Token`]s of a source string.
+///
+/// Tokens borrow their text slice directly from the original source, so the
+/// lexer lifetime `'src` must outlive any use of the produced tokens.
+///
+/// Construct with [`Lexer::new`] and consume via the [`Iterator`] impl or by
+/// passing it to the parser.
+pub struct Lexer<'src> {
+    /// One-character look-ahead over the source characters.
+    chars: Peekable<Chars<'src>>,
+    /// The full source text, kept for slice extraction in [`make`](Self::make).
+    source: &'src str,
+    /// Current byte offset into `source`.  Advanced by [`advance`](Self::advance).
+    position: usize,
+}
+
+impl<'src> Lexer<'src> {
+    /// Creates a new [`Lexer`] positioned at the start of `source`.
+    pub fn new(source: &'src str) -> Self {
+        Self {
+            chars: source.chars().peekable(),
+            source,
+            position: 0,
+        }
+    }
+
+    /// Peek at the next character without consuming it.
+    fn peek(&mut self) -> Option<char> {
+        self.chars.peek().copied()
+    }
+
+    /// Consume and return the next character, advancing [`position`](Self::position)
+    /// by the character's UTF-8 byte length.
+    ///
+    /// # Panics
+    /// Panics if called at the end of input.  Always guard with
+    /// [`peek`](Self::peek) first.
+    fn advance(&mut self) -> char {
+        let ch = self.chars.next().expect("failed to advance the lexer");
+        self.position += ch.len_utf8();
+        ch
+    }
+
+    /// Advance while `condition` holds, stopping at the first character for
+    /// which it returns `false` (or at end of input).
+    fn advance_while(&mut self, condition: impl FnMut(char) -> bool + Copy) {
+        while self.peek().is_some_and(condition) {
+            self.advance();
+        }
+    }
+
+    /// Construct a [`Token`] spanning the byte range `[start, self.position)`.
+    ///
+    /// The token's `text` is a zero-copy slice of the source string.
+    fn make(&self, kind: TokenKind, start: usize) -> Token<'src> {
+        Token {
+            kind,
+            span: Span::new(start as u32, self.position as u32),
+            text: &self.source[start..self.position],
+        }
+    }
+
+    /// Skip any run of whitespace followed by a `#` line comment, repeating
+    /// until neither is present.
+    ///
+    /// Comments begin with `#` and extend to (but do not include) the
+    /// following `\n`.
+    fn skip_whitespace_and_comments(&mut self) {
+        loop {
+            self.advance_while(char::is_whitespace);
+
+            if self.peek() == Some('#') {
+                self.advance_while(|ch| ch != '\n');
+            } else {
+                break;
+            }
+        }
+    }
+
+    /// Lex the next identifier or keyword token.
+    ///
+    /// Assumes the current peek character satisfies `is_xid_start() || == '_'`.
+    /// Consumes one XID_Start (or `_`) character followed by any number of
+    /// XID_Continue characters, then matches the resulting slice against the
+    /// keyword / type-keyword table.
+    fn next_identifier(&mut self) -> TokenKind {
+        let start = self.position;
+
+        self.advance();
+        self.advance_while(|ch| ch.is_xid_continue());
+
+        match &self.source[start..self.position] {
+            "and" => TokenKind::KwAnd,
+            "or" => TokenKind::KwOr,
+            "as" => TokenKind::KwAs,
+            "let" => TokenKind::KwLet,
+
+            "u8" => TokenKind::TyU8,
+            "u16" => TokenKind::TyU16,
+            "u32" => TokenKind::TyU32,
+            "u64" => TokenKind::TyU64,
+
+            "i8" => TokenKind::TyI8,
+            "i16" => TokenKind::TyI16,
+            "i32" => TokenKind::TyI32,
+            "i64" => TokenKind::TyI64,
+
+            "bool" => TokenKind::TyBool,
+
+            "true" | "false" => TokenKind::LitBool,
+            _ => TokenKind::Identifier,
+        }
+    }
+
+    /// Lex the next integer literal token.
+    ///
+    /// Assumes the current peek character is an ASCII digit.  Detects an
+    /// optional radix prefix (`0x` → 16, `0o` → 8, `0b` → 2) then consumes
+    /// all subsequent digits valid for that radix.  Always returns
+    /// [`TokenKind::LitInt`].
+    fn next_number(&mut self) -> TokenKind {
+        let radix = match self.advance() {
+            '0' => match self.peek() {
+                Some('x') => {
+                    self.advance();
+                    16
+                }
+                Some('o') => {
+                    self.advance();
+                    8
+                }
+                Some('b') => {
+                    self.advance();
+                    2
+                }
+                _ => 10,
+            },
+            _ => 10,
+        };
+
+        self.advance_while(|ch| ch.is_digit(radix));
+
+        TokenKind::LitInt
+    }
+
+    /// Lex the next string literal token.
+    ///
+    /// Assumes the current peek character is `"`.  Consumes characters until
+    /// a closing (unescaped) `"` is found or input is exhausted.  A `\`
+    /// escapes the immediately following character, preventing it from being
+    /// treated as a closing delimiter.  Always returns [`TokenKind::LitString`].
+    ///
+    /// Note: escape sequences are not validated here; that is left to a later
+    /// compiler stage.
+    fn next_string(&mut self) -> TokenKind {
+        let mut escaped = false;
+
+        self.advance();
+
+        while let Some(ch) = self.peek() {
+            if escaped {
+                self.advance();
+                escaped = false;
+            } else if ch == '\\' {
+                self.advance();
+                escaped = true;
+            } else if ch == '"' {
+                self.advance();
+                break;
+            } else {
+                self.advance();
+            }
+        }
+
+        TokenKind::LitString
+    }
+}
+
+impl<'src> Iterator for Lexer<'src> {
+    type Item = Token<'src>;
+
+    /// Returns the next [`Token`], or `None` when the source is exhausted.
+    ///
+    /// Leading whitespace and `#`-comments are skipped before each token.
+    /// Multi-character operator tokens (`->`, `<<`, `<=`, …) are resolved with
+    /// a single character of lookahead via the `token!` macro.  Unrecognised
+    /// characters are returned as [`TokenKind::Unknown`].
+    fn next(&mut self) -> Option<Self::Item> {
+        self.skip_whitespace_and_comments();
+
+        let start = self.position;
+
+        /// Builds and evaluates a [`TokenKind`] from the current position.
+        ///
+        /// Three forms:
+        /// - `token!($kind)` — single-character token: advance once, yield `$kind`.
+        /// - `token!($c => $kind, … ; $default)` — multi-character token with
+        ///   lookahead: advance once (consuming the lead character), then
+        ///   check the next character against each `$c => $kind` arm in order,
+        ///   falling back to `$default` if none match.
+        macro_rules! token {
+            // Case 1: Simple token (no lookahead)
+            ($default:expr) => {{
+                self.advance();
+                $default
+            }};
+
+            // Case 2: Multi-character lookahead entry point
+            ($($c:expr => $kind:expr),+ ; $default:expr) => {{
+                self.advance();
+                token!(@step $($c => $kind),+ ; $default)
+            }};
+
+            // Internal Recursive step: More than one pair remains
+            (@step $c:expr => $kind:expr, $($rest_c:expr => $rest_k:expr),+ ; $default:expr) => {
+                if self.peek() == Some($c) {
+                    self.advance();
+                    $kind
+                } else {
+                    token!(@step $($rest_c => $rest_k),+ ; $default)
+                }
+            };
+
+            // Internal Base case: Last pair in the lookahead chain
+            (@step $c:expr => $kind:expr ; $default:expr) => {
+                if self.peek() == Some($c) {
+                    self.advance();
+                    $kind
+                } else {
+                    $default
+                }
+            };
+        }
+
+        let kind = match self.peek()? {
+            ch if ch.is_xid_start() || ch == '_' => self.next_identifier(),
+            '0'..='9' => self.next_number(),
+            '"' => self.next_string(),
+
+            '+' => token!(TokenKind::Plus),
+            '-' => token!(
+                '>' => TokenKind::Arrow;
+                TokenKind::Minus
+            ),
+            '*' => token!(TokenKind::Star),
+            '/' => token!(TokenKind::Slash),
+            '%' => token!(TokenKind::Percent),
+            '&' => token!(TokenKind::Amp),
+            '|' => token!(TokenKind::Pipe),
+            '^' => token!(TokenKind::Caret),
+            '~' => token!(TokenKind::Tilde),
+            '<' => token!(
+                '<' => TokenKind::Shl,
+                '=' => TokenKind::Le;
+                TokenKind::Lt
+            ),
+            '>' => token!(
+                '>' => TokenKind::Shr,
+                '=' => TokenKind::Ge;
+                TokenKind::Gt
+            ),
+            '!' => token!(
+                '=' => TokenKind::Ne;
+                TokenKind::Bang
+            ),
+            '=' => token!(
+                '=' => TokenKind::Eq;
+                TokenKind::Assign
+            ),
+            '.' => token!(TokenKind::Dot),
+            ',' => token!(TokenKind::Comma),
+            ':' => token!(TokenKind::Colon),
+            ';' => token!(TokenKind::Semi),
+            '(' => token!(TokenKind::LParen),
+            ')' => token!(TokenKind::RParen),
+            '[' => token!(TokenKind::LBracket),
+            ']' => token!(TokenKind::RBracket),
+            '{' => token!(TokenKind::LCurly),
+            '}' => token!(TokenKind::RCurly),
+
+            _ => token!(TokenKind::Unknown),
+        };
+
+        Some(self.make(kind, start))
+    }
+}
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,9 +1,43 @@
-use crate::cli::parse_args;
+use std::fs;

+use crate::{
+    cli::{fatal, parse_args},
+    parser::Parser,
+};
+
+mod ast;
 mod cli;
+mod diagnostic;
+mod lexer;
+mod parser;
+mod token;

 fn main() {
    let opts = parse_args();

-    println!("{opts:#?}");
+    for file in &opts.files {
+        let content = match fs::read_to_string(file) {
+            Ok(content) => content,
+            Err(error) => {
+                fatal(format!(
+                    "failed to read {}: {:?}",
+                    file.display(),
+                    error.kind()
+                ));
+            }
+        };
+
+        println!("-- {} --", file.display());
+        let mut parser = Parser::new(&content);
+
+        match parser.parse_statement() {
+            Ok(ast) => println!("{ast:#?}"),
+            Err(diag) => diag.report(file, &content),
+        }
+
+        parser
+            .errors
+            .into_iter()
+            .for_each(|diag| diag.report(file, &content));
+    }
 }
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -0,0 +1,558 @@
+//! Recursive-descent / Pratt parser that converts a token stream into an AST.
+//!
+//! The entry points are [`Parser::parse_statement`], [`Parser::parse_type`],
+//! and [`Parser::parse_expression`].
+//! Errors are represented as [`Diagnostic`] values; the caller is responsible
+//! for reporting them.
+use std::iter::Peekable;
+
+use crate::ast;
+use crate::diagnostic::{Diagnostic, Severity};
+use crate::lexer::Lexer;
+use crate::token::{Token, TokenKind};
+
+/// Consumes the [`Token`] stream produced by the [`Lexer`] and constructs an
+/// AST in the [`ast::Parsed`] phase.
+///
+/// The parser uses a single token of look-ahead (peek) for all decisions.
+/// Expression parsing is implemented with the
+/// [Pratt / top-down operator-precedence](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html)
+/// algorithm; binding-power tables are defined by [`infix_binding_power`],
+/// [`prefix_binding_power`], and [`postfix_binding_power`].
+pub struct Parser<'src> {
+    tokens: Peekable<Lexer<'src>>,
+    /// Diagnostics accumulated during parsing. Non-fatal errors are pushed here
+    /// so that the parser can attempt to continue and surface multiple issues
+    /// in a single pass.
+    pub errors: Vec<Diagnostic>,
+}
+
+impl<'src> Parser<'src> {
+    /// Constructs a new [`Parser`] with the given source text.
+    pub fn new(source: &'src str) -> Self {
+        Self {
+            tokens: Lexer::new(source).peekable(),
+            errors: Vec::new(),
+        }
+    }
+
+    /// Peek at the next [`Token`] without consuming it.
+    fn peek(&mut self) -> Option<Token<'src>> {
+        self.tokens.peek().copied()
+    }
+
+    /// Peek at the next [`Token`], returning an [`Err`] diagnostic if the
+    /// token stream is exhausted.
+    fn peek_no_eof(&mut self) -> Result<Token<'src>, Diagnostic> {
+        self.peek()
+            .ok_or_else(|| Diagnostic::new(Severity::Error, "unexpected end of input"))
+    }
+
+    /// Returns `true` if the next token has the given [`TokenKind`].
+    fn is_peek(&mut self, kind: TokenKind) -> bool {
+        self.peek().map_or(false, |tok| tok.is(kind))
+    }
+
+    /// Returns `true` if the token stream is exhausted.
+    fn is_at_eof(&mut self) -> bool {
+        self.peek().is_none()
+    }
+
+    /// Consumes and returns the next [`Token`].
+    ///
+    /// # Panics
+    /// Panics if called at the end of input.  Always check [`is_at_eof`](Self::is_at_eof)
+    /// or use [`peek_no_eof`](Self::peek_no_eof) / [`expect`](Self::expect) in
+    /// production code paths.
+    fn advance(&mut self) -> Token<'src> {
+        self.tokens.next().expect("failed to advance the parser")
+    }
+
+    /// Consumes and returns the next [`Token`] if it matches `kind`; otherwise
+    /// returns an [`Err`] diagnostic that points at the offending token.
+    fn expect(&mut self, kind: TokenKind) -> Result<Token<'src>, Diagnostic> {
+        match self.peek() {
+            Some(tok) if tok.is(kind) => Ok(self.advance()),
+            Some(tok) => Err(Diagnostic::new(Severity::Error, "unexpected token found")
+                .with_span(tok.span)
+                .add_label(
+                    tok.span,
+                    format!("expected {} but found {} instead", kind, tok.kind),
+                )),
+            None => Err(Diagnostic::new(Severity::Error, "unexpected end of input")),
+        }
+    }
+
+    /// Error-recovery helper: skips tokens until a statement boundary is
+    /// reached so that subsequent statements can still be parsed cleanly.
+    ///
+    /// Stops *after* consuming a `;`, or *before* consuming a `}`.  This keeps
+    /// nested blocks intact when recovering inside function bodies.
+    fn synchronize(&mut self) {
+        while let Some(peek) = self.peek() {
+            match peek.kind {
+                // Consume the `;` and stop
+                TokenKind::Semi => {
+                    self.advance();
+                    break;
+                }
+
+                // Stop before these
+                TokenKind::RCurly => break,
+
+                _ => _ = self.advance(),
+            }
+        }
+    }
+
+    /// Parses the next statement.
+    ///
+    /// Dispatches to the appropriate specialised parser based on the leading
+    /// token:
+    /// - `let` → [`parse_let_statement`](Self::parse_let_statement)
+    /// - `{`   → [`parse_compound_statement`](Self::parse_compound_statement)
+    /// - anything else → an expression followed by a mandatory `;`
+    pub fn parse_statement(&mut self) -> Result<ast::ParsedStatement, Diagnostic> {
+        let peek = self.peek_no_eof()?;
+
+        match peek.kind {
+            TokenKind::KwLet => self.parse_let_statement(),
+            TokenKind::LCurly => self.parse_compound_statement(),
+
+            _ => {
+                let expr = self.parse_expression(0)?;
+                let semi_token = self.expect(TokenKind::Semi)?;
+                let span = expr.span.extend(semi_token.span);
+
+                Ok(ast::ParsedStatement {
+                    kind: ast::StatementKind::Expr(expr),
+                    span,
+                    extra: (),
+                })
+            }
+        }
+    }
+
+    /// Parses a `let` binding statement: `let <name>[: <type>] [= <expr>];`.
+    ///
+    /// Both the type annotation and the initialiser are optional.  The
+    /// statement span runs from the `let` keyword through to the closing `;`.
+    fn parse_let_statement(&mut self) -> Result<ast::ParsedStatement, Diagnostic> {
+        let let_token = self.expect(TokenKind::KwLet)?;
+
+        let (name, name_span) = {
+            let ident_token = self.expect(TokenKind::Identifier)?;
+            (ident_token.text.to_string(), ident_token.span)
+        };
+
+        let ty = if self.is_peek(TokenKind::Colon) {
+            self.advance();
+            Some(self.parse_type()?)
+        } else {
+            None
+        };
+
+        let value = if self.is_peek(TokenKind::Assign) {
+            self.advance();
+            Some(self.parse_expression(0)?)
+        } else {
+            None
+        };
+
+        let semi_token = self.expect(TokenKind::Semi)?;
+
+        Ok(ast::ParsedStatement {
+            kind: ast::StatementKind::Let {
+                name,
+                name_span,
+                ty,
+                value,
+            },
+            span: let_token.span.extend(semi_token.span),
+            extra: (),
+        })
+    }
+
+    /// Parses a braced block of statements: `{ <stmt>* }`.
+    ///
+    /// Each inner statement is parsed with [`parse_statement`](Self::parse_statement).
+    /// If a statement fails, the diagnostic is pushed onto [`errors`](Parser::errors)
+    /// and [`synchronize`](Self::synchronize) is called so that parsing can
+    /// continue with the next statement. The block span runs from `{` to `}`.
+    fn parse_compound_statement(&mut self) -> Result<ast::ParsedStatement, Diagnostic> {
+        let lcurly_token = self.expect(TokenKind::LCurly)?;
+        let mut inner = Vec::new();
+
+        while !self.is_at_eof() && !self.is_peek(TokenKind::RCurly) {
+            match self.parse_statement() {
+                Ok(stmt) => inner.push(stmt),
+                Err(diag) => {
+                    self.errors.push(diag);
+                    self.synchronize();
+                }
+            }
+        }
+
+        let rcurly_token = self.expect(TokenKind::RCurly)?;
+        let span = lcurly_token.span.extend(rcurly_token.span);
+
+        Ok(ast::ParsedStatement {
+            kind: ast::StatementKind::Compound { inner },
+            span,
+            extra: (),
+        })
+    }
+
+    /// Parses a type annotation, e.g. `u8`, `i64`, `bool`, or a user-defined
+    /// named type.
+    ///
+    /// Returns an [`Err`] diagnostic if the next token is not a valid type.
+    pub fn parse_type(&mut self) -> Result<ast::ParsedType, Diagnostic> {
+        let peek = self.peek_no_eof()?;
+
+        let kind = match peek.kind {
+            TokenKind::TyU8 => ast::TypeKind::U8,
+            TokenKind::TyU16 => ast::TypeKind::U16,
+            TokenKind::TyU32 => ast::TypeKind::U32,
+            TokenKind::TyU64 => ast::TypeKind::U64,
+
+            TokenKind::TyI8 => ast::TypeKind::I8,
+            TokenKind::TyI16 => ast::TypeKind::I16,
+            TokenKind::TyI32 => ast::TypeKind::I32,
+            TokenKind::TyI64 => ast::TypeKind::I64,
+
+            TokenKind::TyBool => ast::TypeKind::Bool,
+
+            TokenKind::Identifier => ast::TypeKind::Named(peek.text.to_string()),
+
+            _ => {
+                return Err(
+                    Diagnostic::new(Severity::Error, "expected a type").with_span(peek.span)
+                );
+            }
+        };
+
+        let span = self.advance().span;
+
+        Ok(ast::ParsedType {
+            kind,
+            span,
+            extra: (),
+        })
+    }
+
+    /// Parses an expression using the Pratt (top-down operator-precedence)
+    /// algorithm.
+    ///
+    /// `min_bp` is the minimum *left* binding power the next infix/postfix
+    /// operator must have to be incorporated into the current expression.  Pass
+    /// `0` to parse a full expression with no restrictions.
+    ///
+    /// The precedence hierarchy (low → high) is:
+    /// - assignment (`=`)
+    /// - logical `or` / `and`
+    /// - bitwise `|` / `^` / `&`
+    /// - equality (`==`, `!=`) and comparison (`<`, `<=`, `>`, `>=`)
+    /// - addition / subtraction
+    /// - shifts (`<<`, `>>`)
+    /// - multiplication / division / remainder
+    /// - member access (`.`)
+    /// - postfix: call `()`, index `[]`, cast `as`
+    /// - prefix: `-`, `&`, `~`, `*`, `!`
+    pub fn parse_expression(&mut self, min_bp: u8) -> Result<ast::ParsedExpression, Diagnostic> {
+        let peek_token = self.peek_no_eof()?;
+
+        let mut left = if let Some((right_bp, op)) = prefix_binding_power(peek_token.kind) {
+            let op_span = self.advance().span;
+            let operand = Box::new(self.parse_expression(right_bp)?);
+
+            let span = op_span.extend(operand.span);
+
+            ast::ParsedExpression {
+                kind: ast::ExpressionKind::Unary {
+                    op,
+                    op_span,
+                    operand,
+                },
+                span,
+                extra: (),
+            }
+        } else {
+            self.parse_primary_expression()?
+        };
+
+        while let Some(peek_token) = self.peek() {
+            if let Some(left_bp) = postfix_binding_power(peek_token.kind) {
+                if left_bp < min_bp {
+                    break;
+                }
+
+                left = match peek_token.kind {
+                    TokenKind::LParen => self.parse_call_expr(left)?,
+                    TokenKind::LBracket => self.parse_index_expr(left)?,
+                    TokenKind::KwAs => self.parse_cast_expr(left)?,
+
+                    _ => unreachable!(),
+                };
+
+                continue;
+            }
+
+            if let Some((left_bp, right_bp, op)) = infix_binding_power(peek_token.kind) {
+                if left_bp < min_bp {
+                    break;
+                }
+
+                let op_span = self.advance().span;
+
+                let right = self.parse_expression(right_bp)?;
+                let span = left.span.extend(right.span);
+
+                left = ast::ParsedExpression {
+                    kind: ast::ExpressionKind::Binary {
+                        op,
+                        op_span,
+                        left: Box::new(left),
+                        right: Box::new(right),
+                    },
+                    span,
+                    extra: (),
+                };
+
+                continue;
+            }
+
+            break;
+        }
+
+        Ok(left)
+    }
+
+    /// Parses a primary (non-operator) expression: an identifier, integer
+    /// literal, boolean literal, or a parenthesised expression.
+    ///
+    /// Integer literals support `0x` (hex), `0o` (octal), and `0b` (binary)
+    /// prefixes in addition to plain decimal.
+    fn parse_primary_expression(&mut self) -> Result<ast::ParsedExpression, Diagnostic> {
+        let peek_token = self.peek_no_eof()?;
+
+        match peek_token.kind {
+            TokenKind::Identifier => {
+                let name = self.advance().text.to_string();
+
+                Ok(ast::ParsedExpression {
+                    kind: ast::ExpressionKind::Identifier(name),
+                    span: peek_token.span,
+                    extra: (),
+                })
+            }
+            TokenKind::LitInt => {
+                let tok = self.advance();
+
+                let (radix, src) = [("0x", 16), ("0o", 8), ("0b", 2)]
+                    .into_iter()
+                    .find_map(|(prefix, radix)| {
+                        tok.text.strip_prefix(prefix).map(|text| (radix, text))
+                    })
+                    .unwrap_or((10, tok.text));
+
+                let value = u64::from_str_radix(src, radix).map_err(|_| {
+                    Diagnostic::new(Severity::Error, "invalid integer literal")
+                        .with_span(tok.span)
+                        .add_label(tok.span, "this is an invalid integer literal")
+                })?;
+
+                Ok(ast::ParsedExpression {
+                    kind: ast::ExpressionKind::LitInteger(value),
+                    span: tok.span,
+                    extra: (),
+                })
+            }
+            TokenKind::LitBool => {
+                let value = self.advance().text == "true";
+
+                Ok(ast::ParsedExpression {
+                    kind: ast::ExpressionKind::LitBool(value),
+                    span: peek_token.span,
+                    extra: (),
+                })
+            }
+            TokenKind::LParen => {
+                let open_paren = self.advance();
+                let inner = self.parse_expression(0)?;
+                let close_paren = self.expect(TokenKind::RParen)?;
+
+                Ok(ast::ParsedExpression {
+                    kind: inner.kind,
+                    span: open_paren.span.extend(close_paren.span),
+                    extra: (),
+                })
+            }
+            _ => Err(Diagnostic::new(
+                Severity::Error,
+                format!(
+                    "expected one of {}, {} or {} but found {} instead",
+                    TokenKind::Identifier,
+                    TokenKind::LitInt,
+                    TokenKind::LitBool,
+                    peek_token.kind
+                ),
+            )
+            .with_span(peek_token.span)),
+        }
+    }
+
+    /// Parses a function-call expression `func(arg, …)`.
+    ///
+    /// The opening `(` is consumed here; `func` is the already-parsed callee
+    /// expression passed in from the Pratt loop.
+    fn parse_call_expr(
+        &mut self,
+        func: ast::ParsedExpression,
+    ) -> Result<ast::ParsedExpression, Diagnostic> {
+        self.expect(TokenKind::LParen)?;
+        let mut args = Vec::new();
+
+        while !self.is_at_eof() && !self.is_peek(TokenKind::RParen) {
+            if !args.is_empty() {
+                self.expect(TokenKind::Comma)?;
+            }
+
+            args.push(self.parse_expression(0)?);
+        }
+
+        let rparen_token = self.expect(TokenKind::RParen)?;
+        let span = func.span.extend(rparen_token.span);
+
+        Ok(ast::ParsedExpression {
+            kind: ast::ExpressionKind::Call {
+                func: Box::new(func),
+                args,
+            },
+            span,
+            extra: (),
+        })
+    }
+
+    /// Parses an index expression `expr[index]`.
+    ///
+    /// The opening `[` is consumed here; `expr` is the already-parsed
+    /// collection expression passed in from the Pratt loop.
+    fn parse_index_expr(
+        &mut self,
+        expr: ast::ParsedExpression,
+    ) -> Result<ast::ParsedExpression, Diagnostic> {
+        self.expect(TokenKind::LBracket)?;
+
+        let index = self.parse_expression(0)?;
+
+        let rbracket_token = self.expect(TokenKind::RBracket)?;
+        let span = expr.span.extend(rbracket_token.span);
+
+        Ok(ast::ParsedExpression {
+            kind: ast::ExpressionKind::Index {
+                expr: Box::new(expr),
+                index: Box::new(index),
+            },
+            span,
+            extra: (),
+        })
+    }
+
+    /// Parses a cast expression `expr as Type`.
+    ///
+    /// The `as` keyword is consumed here; `expr` is the already-parsed value
+    /// expression passed in from the Pratt loop.
+    fn parse_cast_expr(
+        &mut self,
+        expr: ast::ParsedExpression,
+    ) -> Result<ast::ParsedExpression, Diagnostic> {
+        self.expect(TokenKind::KwAs)?;
+
+        let ty = self.parse_type()?;
+        let span = expr.span.extend(ty.span);
+
+        Ok(ast::ParsedExpression {
+            kind: ast::ExpressionKind::Cast {
+                expr: Box::new(expr),
+                ty: Box::new(ty),
+            },
+            span,
+            extra: (),
+        })
+    }
+}
+
+/// Returns `(left_bp, right_bp, op)` for infix operators, or `None` if `kind`
+/// is not an infix operator.
+///
+/// The two binding-power values implement associativity: equal values give
+/// left-associativity, and `right_bp = left_bp` gives right-associativity
+/// (currently used for `=`).
+fn infix_binding_power(kind: TokenKind) -> Option<(u8, u8, ast::BinaryOp)> {
+    Some(match kind {
+        TokenKind::Assign => (2, 2, ast::BinaryOp::Assign),
+
+        TokenKind::KwOr => (10, 11, ast::BinaryOp::Or),
+        TokenKind::KwAnd => (20, 21, ast::BinaryOp::And),
+
+        TokenKind::Pipe => (30, 31, ast::BinaryOp::BitOr),
+        TokenKind::Caret => (40, 41, ast::BinaryOp::BitXor),
+        TokenKind::Amp => (50, 51, ast::BinaryOp::BitAnd),
+
+        TokenKind::Eq => (55, 56, ast::BinaryOp::Eq),
+        TokenKind::Ne => (55, 56, ast::BinaryOp::Ne),
+
+        TokenKind::Lt => (57, 58, ast::BinaryOp::Lt),
+        TokenKind::Le => (57, 58, ast::BinaryOp::Le),
+        TokenKind::Gt => (57, 58, ast::BinaryOp::Gt),
+        TokenKind::Ge => (57, 58, ast::BinaryOp::Ge),
+
+        TokenKind::Plus => (60, 61, ast::BinaryOp::Add),
+        TokenKind::Minus => (60, 61, ast::BinaryOp::Sub),
+
+        TokenKind::Shl => (65, 66, ast::BinaryOp::BitShl),
+        TokenKind::Shr => (65, 66, ast::BinaryOp::BitShr),
+
+        TokenKind::Star => (70, 71, ast::BinaryOp::Mul),
+        TokenKind::Slash => (70, 71, ast::BinaryOp::Div),
+        TokenKind::Percent => (70, 71, ast::BinaryOp::Rem),
+
+        TokenKind::Dot => (100, 101, ast::BinaryOp::Dot),
+
+        _ => return None,
+    })
+}
+
+/// Returns `(right_bp, op)` for prefix operators, or `None` if `kind` is not
+/// a prefix operator.
+///
+/// All prefix operators currently share the same binding power (`80`), giving
+/// them higher precedence than any binary operator.
+fn prefix_binding_power(kind: TokenKind) -> Option<(u8, ast::UnaryOp)> {
+    Some(match kind {
+        TokenKind::Minus => (80, ast::UnaryOp::Neg),
+        TokenKind::Amp => (80, ast::UnaryOp::AddrOf),
+        TokenKind::Tilde => (80, ast::UnaryOp::BitNot),
+        TokenKind::Star => (80, ast::UnaryOp::Deref),
+        TokenKind::Bang => (80, ast::UnaryOp::Not),
+
+        _ => return None,
+    })
+}
+
+/// Returns the *left* binding power for postfix operators, or `None` if `kind`
+/// is not a postfix operator.
+///
+/// Postfix operators (`()`, `[]`, `as`) bind tighter than all binary operators
+/// but are checked before prefix operators in the Pratt loop so they always
+/// apply to the nearest sub-expression.
+fn postfix_binding_power(kind: TokenKind) -> Option<u8> {
+    Some(match kind {
+        TokenKind::LParen => 100,
+        TokenKind::LBracket => 100,
+        TokenKind::KwAs => 90,
+
+        _ => return None,
+    })
+}
--- a/src/token.rs
+++ b/src/token.rs
@@ -0,0 +1,179 @@
+//! Token definitions used by the [`Lexer`](crate::lexer::Lexer) and
+//! [`Parser`](crate::parser::Parser).
+//!
+//! The two core types are:
+//! - [`Span`] — a half-open byte range that marks a location in source text.
+//! - [`Token`] — a classified slice of source text together with its span.
+//!
+//! [`TokenKind`] enumerates every token variant; its [`Display`](std::fmt::Display)
+//! impl produces the human-readable representation used in diagnostics.
+use std::fmt;
+
+/// A half-open byte range `[start, end)` that marks a location in the source
+/// string.
+///
+/// Positions are stored as [`u32`], which limits supported source files to
+/// 4 GiB — more than sufficient for any practical source file.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct Span {
+    /// Inclusive start byte offset.
+    pub start: u32,
+    /// Exclusive end byte offset.
+    pub end: u32,
+}
+
+impl Span {
+    /// Creates a new span covering `[start, end)`.
+    pub const fn new(start: u32, end: u32) -> Self {
+        Self { start, end }
+    }
+
+    /// Returns the length of the span in bytes.
+    ///
+    /// Uses saturating subtraction so an inverted span returns `0` rather than
+    /// wrapping.
+    pub fn len(&self) -> u32 {
+        self.end.saturating_sub(self.start)
+    }
+
+    /// Returns `true` if the span covers zero bytes (`start == end`).
+    pub fn is_empty(&self) -> bool {
+        self.start == self.end
+    }
+
+    /// Returns the smallest span that covers both `self` and `other`.
+    ///
+    /// This is the union of the two ranges, useful for computing the span of a
+    /// parent node from its children.
+    pub fn extend(self, other: Self) -> Self {
+        Self {
+            start: self.start.min(other.start),
+            end: self.end.max(other.end),
+        }
+    }
+}
+
+impl fmt::Display for Span {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}..{}", self.start, self.end)
+    }
+}
+
+/// Simultaneously defines the [`TokenKind`] enum and its [`fmt::Display`] impl.
+///
+/// Each arm maps a variant name to the human-readable string used in
+/// diagnostics (e.g. `` `+` ``, `identifier`).
+macro_rules! define_tokens {
+    ($($name:ident => $repr:literal),*  $(,)?) => {
+        #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+        pub enum TokenKind {
+            $($name),*
+        }
+
+        impl fmt::Display for TokenKind {
+            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                f.write_str(match self {
+                    $(TokenKind::$name => $repr),*
+                })
+            }
+        }
+    };
+}
+
+define_tokens! {
+    // -- Identifier --
+    Identifier => "identifier",
+
+    // -- Literals --
+    LitInt    => "integer literal",
+    LitBool   => "boolean literal",
+    LitString => "string literal",
+
+    // -- Keywords --
+    KwAnd => "`and`",
+    KwOr  => "`or`",
+    KwAs  => "`as`",
+    KwLet => "`let`",
+
+    // -- Type Keywords --
+    TyU8   => "`u8`",
+    TyU16  => "`u16`",
+    TyU32  => "`u32`",
+    TyU64  => "`u64`",
+    TyI8   => "`i8`",
+    TyI16  => "`i16`",
+    TyI32  => "`i32`",
+    TyI64  => "`i64`",
+    TyBool => "`bool`",
+
+    // -- Arithmetic Operators --
+    Plus    => "`+`",
+    Minus   => "`-`",
+    Star    => "`*`",
+    Slash   => "`/`",
+    Percent => "`%`",
+
+    // -- Bitwise / Logical Operators --
+    Amp   => "`&`",
+    Pipe  => "`|`",
+    Caret => "`^`",
+    Tilde => "`~`",
+    Shl   => "`<<`",
+    Shr   => "`>>`",
+    Bang  => "`!`",
+
+    // -- Comparison Operators --
+    Eq => "`==`",
+    Ne => "`!=`",
+    Lt => "`<`",
+    Le => "`<=`",
+    Gt => "`>`",
+    Ge => "`>=`",
+
+    // -- Punctuation --
+    Assign => "`=`",
+    Arrow  => "`->`",
+    Dot    => "`.`",
+    Colon  => "`:`",
+    Comma  => "`,`",
+    Semi   => "`;`",
+
+    // -- Delimiters --
+    LParen   => "`(`",
+    RParen   => "`)`",
+    LBracket => "`[`",
+    RBracket => "`]`",
+    LCurly   => "`{`",
+    RCurly   => "`}`",
+
+    // -- Special --
+    Unknown => "unknown character"
+}
+
+/// The smallest contiguous unit of source text, as produced by the
+/// [`Lexer`](crate::lexer::Lexer).
+///
+/// A token borrows its [`text`](Token::text) slice directly from the original
+/// source string, so the lifetime `'src` ties every token to that source.
+#[derive(Debug, Clone, Copy)]
+pub struct Token<'src> {
+    /// The syntactic category of this token.
+    pub kind: TokenKind,
+    /// The byte range in the source string where this token appears.
+    pub span: Span,
+    /// The raw source text of this token (a zero-copy slice).
+    pub text: &'src str,
+}
+
+impl<'src> Token<'src> {
+    /// Returns `true` if this token has the given [`TokenKind`].
+    pub fn is(&self, kind: TokenKind) -> bool {
+        self.kind == kind
+    }
+}
+
+impl<'src> fmt::Display for Token<'src> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{:?} `{}` @ {}", self.kind, self.text, self.span)
+    }
+}
--- a/test.bky
+++ b/test.bky
@@ -0,0 +1,4 @@
+{
+    let a : 0;
+    f();
+}
Author	SHA1	Message	Date
Jooris Hadeler	cde0ff5582	feat: Add support for compound statements. This commit adds parsing logic for compound statements.	2026-03-12 21:42:40 +01:00
Jooris Hadeler	1107c7d93d	feat: Add support for `let` and expression statements. This commit implements parsing for `let` statements, anything that is not a let statement will be assumed to be an expression statement.	2026-03-12 21:23:09 +01:00
Jooris Hadeler	bb9cb8d2d1	docs: Add and improve documentation for every module.	2026-03-12 20:44:41 +01:00
Jooris Hadeler	4e2df32e36	feat: Add support for type parsing. This commit adds simple primitive type parsing, it also adds a new type of expression called the cast expression.	2026-03-12 12:50:17 +01:00
Jooris Hadeler	93f08d1944	feat: Add parsing for expressions. This commit adds support for parsing expression using the pratt parsing approach.	2026-03-12 12:14:00 +01:00
Jooris Hadeler	9ac8a79151	feat: Add a `README.md` and `logo.svg`.	2026-03-11 23:42:43 +01:00
Jooris Hadeler	51bd07d313	feat: Add token definitions and lexer logic. This commit adds the `Token` and `TokenKind` definitions in `src/token.rs`, in `src/lexer.rs` I've added the `Lexer` logic.	2026-03-11 23:42:39 +01:00