Compare commits
7 Commits
1f6f876f58
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| cde0ff5582 | |||
| 1107c7d93d | |||
| bb9cb8d2d1 | |||
| 4e2df32e36 | |||
| 93f08d1944 | |||
| 9ac8a79151 | |||
| 51bd07d313 |
7
Cargo.lock
generated
7
Cargo.lock
generated
@@ -6,9 +6,16 @@ version = 4
|
||||
name = "buckyc"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"unicode-xid",
|
||||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
||||
|
||||
[[package]]
|
||||
name = "yansi"
|
||||
version = "1.0.1"
|
||||
|
||||
@@ -4,4 +4,5 @@ version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
unicode-xid = "0.2.6"
|
||||
yansi = "1.0.1"
|
||||
|
||||
27
README.md
Normal file
27
README.md
Normal file
@@ -0,0 +1,27 @@
|
||||
<div align="center">
|
||||
<img src="logo.svg" alt="Bucky Logo" width="200" height="200">
|
||||
|
||||
# Bucky
|
||||
|
||||
**Bucky** is a compiled, statically typed programming language with a Rust-inspired syntax, designed for performance, safety, and expressiveness.
|
||||
</div>
|
||||
|
||||
## Features
|
||||
|
||||
- **Compiled**: Produces native binaries for high performance
|
||||
- **Statically Typed**: Strong type checking at compile time
|
||||
- **Rust-Inspired Syntax**: Familiar, expressive, and modern
|
||||
- **Explicit and Predictable**: Emphasizes clarity and correctness
|
||||
- **Safety-Oriented Design**: Encourages writing robust code by default
|
||||
|
||||
## Examples
|
||||
|
||||
```bucky
|
||||
fn fib(n: u64): u64 {
|
||||
if n < 2 {
|
||||
return n;
|
||||
}
|
||||
|
||||
return fib(n - 1) + fib(n - 2);
|
||||
}
|
||||
```
|
||||
7
examples/fibonacci.bky
Normal file
7
examples/fibonacci.bky
Normal file
@@ -0,0 +1,7 @@
|
||||
fn fib(n: u64) -> u64 {
|
||||
if n < 2 {
|
||||
return n;
|
||||
}
|
||||
|
||||
return fib(n - 1) + fib(n - 2);
|
||||
}
|
||||
5
examples/hello-world.bky
Normal file
5
examples/hello-world.bky
Normal file
@@ -0,0 +1,5 @@
|
||||
extern puts(text: *char);
|
||||
|
||||
fn main() {
|
||||
puts("Hello, World!");
|
||||
}
|
||||
33
logo.svg
Normal file
33
logo.svg
Normal file
@@ -0,0 +1,33 @@
|
||||
<svg width="512" height="512" viewBox="0 0 512 512" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<circle cx="256" cy="256" r="256" fill="#F7F5F0"/>
|
||||
|
||||
<defs>
|
||||
<linearGradient id="gradientWarm" x1="100" y1="400" x2="200" y2="100" gradientUnits="userSpaceOnUse">
|
||||
<stop offset="0%" stop-color="#D97042" /> <stop offset="100%" stop-color="#E89F57" /> </linearGradient>
|
||||
|
||||
<linearGradient id="gradientCool" x1="412" y1="400" x2="312" y2="100" gradientUnits="userSpaceOnUse">
|
||||
<stop offset="0%" stop-color="#2D4F56" /> <stop offset="100%" stop-color="#4AA096" /> </linearGradient>
|
||||
|
||||
<filter id="softShadow" x="-20%" y="-20%" width="140%" height="140%">
|
||||
<feDropShadow dx="0" dy="4" stdDeviation="6" flood-color="#000" flood-opacity="0.1"/>
|
||||
</filter>
|
||||
</defs>
|
||||
|
||||
<g filter="url(#softShadow)">
|
||||
|
||||
<path d="M 235 410 C 235 410, 160 360, 145 220 C 140 170, 155 130, 155 130"
|
||||
stroke="url(#gradientWarm)" stroke-width="32" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M 178 340 C 195 340, 110 310, 82 250"
|
||||
stroke="url(#gradientWarm)" stroke-width="28" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M 160 270 C 160 270, 200 230, 210 180"
|
||||
stroke="url(#gradientWarm)" stroke-width="26" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
|
||||
<path d="M 277 410 C 277 410, 352 360, 367 220 C 372 170, 357 130, 357 130"
|
||||
stroke="url(#gradientCool)" stroke-width="32" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M 335 340 C 317 340, 402 310, 430 250"
|
||||
stroke="url(#gradientCool)" stroke-width="28" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M 352 270 C 352 270, 312 230, 302 180"
|
||||
stroke="url(#gradientCool)" stroke-width="26" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.8 KiB |
251
src/ast.rs
Normal file
251
src/ast.rs
Normal file
@@ -0,0 +1,251 @@
|
||||
//! Abstract Syntax Tree (AST) definitions.
|
||||
//!
|
||||
//! The AST is parameterised over a [`Phase`] type-state so that the same node
|
||||
//! types can carry different amounts of information at different compiler
|
||||
//! stages. Currently only the [`Parsed`] phase exists, which attaches no
|
||||
//! extra data (`()`) to each node.
|
||||
//!
|
||||
//! The primary node families are:
|
||||
//! - [`Expression`] / [`ExpressionKind`] — value-producing constructs.
|
||||
//! - [`Type`] / [`TypeKind`] — type annotations.
|
||||
//! - [`Statement`] / [`StatementKind`] — top-level and block-level statements.
|
||||
use std::fmt::Debug;
|
||||
|
||||
use crate::token::Span;
|
||||
|
||||
/// Marker trait that carries phase-specific associated data for AST nodes.
|
||||
///
|
||||
/// Each phase defines an [`ExtraData`](Phase::ExtraData) type that is embedded
|
||||
/// in every node. This allows later compiler passes (e.g. type-checking) to
|
||||
/// augment the tree without duplicating the node hierarchy.
|
||||
///
|
||||
/// Current phases:
|
||||
/// - [`Parsed`] — produced directly by the parser; no extra data.
|
||||
pub trait Phase {
|
||||
type ExtraData: PartialEq + Debug;
|
||||
}
|
||||
|
||||
/// The initial AST phase produced by the parser.
|
||||
///
|
||||
/// In this phase [`Phase::ExtraData`] is `()`, meaning nodes carry only
|
||||
/// syntactic information (kind + source span).
|
||||
#[derive(Debug)]
|
||||
pub struct Parsed;
|
||||
|
||||
impl Phase for Parsed {
|
||||
type ExtraData = ();
|
||||
}
|
||||
|
||||
/// Convenience alias for an [`Expression`] in the [`Parsed`] phase.
|
||||
pub type ParsedExpression = Expression<Parsed>;
|
||||
|
||||
/// A value-producing node in the AST.
|
||||
///
|
||||
/// Every expression carries:
|
||||
/// - [`kind`](Expression::kind) — what *kind* of expression it is.
|
||||
/// - [`span`](Expression::span) — the source location it was parsed from.
|
||||
/// - [`extra`](Expression::extra) — phase-specific data (see [`Phase`]).
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Expression<P: Phase> {
|
||||
pub kind: ExpressionKind<P>,
|
||||
pub span: Span,
|
||||
pub extra: P::ExtraData,
|
||||
}
|
||||
|
||||
/// The concrete variant of an [`Expression`].
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum ExpressionKind<P: Phase> {
|
||||
/// A bare name, e.g. `foo`.
|
||||
Identifier(String),
|
||||
|
||||
/// A string literal, e.g. `"hello"`.
|
||||
LitString(String),
|
||||
/// An integer literal, e.g. `42`, `0xFF`, `0b1010`. The value is stored
|
||||
/// as a `u64` regardless of the source radix.
|
||||
LitInteger(u64),
|
||||
/// A boolean literal: `true` or `false`.
|
||||
LitBool(bool),
|
||||
|
||||
/// A prefix unary expression, e.g. `-x`, `!cond`, `*ptr`.
|
||||
Unary {
|
||||
op: UnaryOp,
|
||||
/// Source span of the operator token itself.
|
||||
op_span: Span,
|
||||
operand: Box<Expression<P>>,
|
||||
},
|
||||
|
||||
/// An infix binary expression, e.g. `a + b`, `x == y`.
|
||||
Binary {
|
||||
op: BinaryOp,
|
||||
/// Source span of the operator token itself.
|
||||
op_span: Span,
|
||||
left: Box<Expression<P>>,
|
||||
right: Box<Expression<P>>,
|
||||
},
|
||||
|
||||
/// A function call, e.g. `f(a, b)`.
|
||||
Call {
|
||||
/// The callee expression (often an [`Identifier`](ExpressionKind::Identifier)).
|
||||
func: Box<Expression<P>>,
|
||||
args: Vec<Expression<P>>,
|
||||
},
|
||||
|
||||
/// An index expression, e.g. `arr[i]`.
|
||||
Index {
|
||||
expr: Box<Expression<P>>,
|
||||
index: Box<Expression<P>>,
|
||||
},
|
||||
|
||||
/// A type-cast expression, e.g. `x as u32`.
|
||||
Cast {
|
||||
expr: Box<Expression<P>>,
|
||||
ty: Box<Type<P>>,
|
||||
},
|
||||
}
|
||||
|
||||
/// A prefix unary operator.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum UnaryOp {
|
||||
/// Bitwise complement (`~`)
|
||||
BitNot,
|
||||
/// Logical negation (`!`)
|
||||
Not,
|
||||
/// Arithmetic negation (`-`)
|
||||
Neg,
|
||||
/// Address-of (`&`)
|
||||
AddrOf,
|
||||
/// Pointer dereference (`*`)
|
||||
Deref,
|
||||
}
|
||||
|
||||
/// An infix binary operator.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum BinaryOp {
|
||||
/// Addition (`+`)
|
||||
Add,
|
||||
/// Subtraction (`-`)
|
||||
Sub,
|
||||
/// Multiplication (`*`)
|
||||
Mul,
|
||||
/// Division (`/`)
|
||||
Div,
|
||||
/// Remainder (`%`)
|
||||
Rem,
|
||||
|
||||
/// Bitwise AND (`&`)
|
||||
BitAnd,
|
||||
/// Bitwise OR (`|`)
|
||||
BitOr,
|
||||
/// Bitwise XOR (`^`)
|
||||
BitXor,
|
||||
/// Left shift (`<<`)
|
||||
BitShl,
|
||||
/// Right shift (`>>`)
|
||||
BitShr,
|
||||
|
||||
/// Logical AND (`and`)
|
||||
And,
|
||||
/// Logical OR (`or`)
|
||||
Or,
|
||||
|
||||
/// Equality (`==`)
|
||||
Eq,
|
||||
/// Inequality (`!=`)
|
||||
Ne,
|
||||
/// Less-than (`<`)
|
||||
Lt,
|
||||
/// Less-than-or-equal (`<=`)
|
||||
Le,
|
||||
/// Greater-than (`>`)
|
||||
Gt,
|
||||
/// Greater-than-or-equal (`>=`)
|
||||
Ge,
|
||||
|
||||
/// Assignment (`=`)
|
||||
Assign,
|
||||
/// Member access (`.`)
|
||||
Dot,
|
||||
}
|
||||
|
||||
/// Convenience alias for a [`Type`] in the [`Parsed`] phase.
|
||||
pub type ParsedType = Type<Parsed>;
|
||||
|
||||
/// A type annotation node in the AST.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Type<P: Phase> {
|
||||
pub kind: TypeKind,
|
||||
pub span: Span,
|
||||
pub extra: P::ExtraData,
|
||||
}
|
||||
|
||||
/// The concrete variant of a [`Type`] annotation.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum TypeKind {
|
||||
/// Signed integers
|
||||
I8,
|
||||
I16,
|
||||
I32,
|
||||
I64,
|
||||
|
||||
/// Unsigned integers
|
||||
U8,
|
||||
U16,
|
||||
U32,
|
||||
U64,
|
||||
|
||||
/// Boolean type (`bool`)
|
||||
Bool,
|
||||
|
||||
/// A user-defined named type, e.g. `MyStruct`.
|
||||
Named(String),
|
||||
}
|
||||
|
||||
/// Convenience alias for a [`Statement`] in the [`Parsed`] phase.
|
||||
pub type ParsedStatement = Statement<Parsed>;
|
||||
|
||||
/// A statement node in the AST.
|
||||
///
|
||||
/// Statements are the sequential building blocks of a block body. Like
|
||||
/// [`Expression`] and [`Type`], a statement is parameterised over a [`Phase`]
|
||||
/// so that later compiler passes can attach additional information without
|
||||
/// changing the node layout.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Statement<P: Phase> {
|
||||
pub kind: StatementKind<P>,
|
||||
pub span: Span,
|
||||
pub extra: P::ExtraData,
|
||||
}
|
||||
|
||||
/// The concrete variant of a [`Statement`].
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum StatementKind<P: Phase> {
|
||||
/// A `let` binding, e.g. `let x: i32 = 0;`.
|
||||
///
|
||||
/// Both the type annotation and the initialiser are optional at the parse
|
||||
/// stage and may be filled in or validated by later passes.
|
||||
Let {
|
||||
/// The name of the binding.
|
||||
name: String,
|
||||
/// Source span of the name token, used for diagnostics.
|
||||
name_span: Span,
|
||||
/// Optional explicit type annotation (`let x: T`).
|
||||
ty: Option<Type<P>>,
|
||||
/// Optional initialiser expression (`= <expr>`).
|
||||
value: Option<Expression<P>>,
|
||||
},
|
||||
|
||||
/// A braced block of statements, e.g. `{ let x = 1; f(x); }`.
|
||||
///
|
||||
/// Compound statements introduce a new scope and can appear anywhere a
|
||||
/// statement is expected.
|
||||
Compound {
|
||||
/// The statements contained within the block, in source order.
|
||||
inner: Vec<Statement<P>>,
|
||||
},
|
||||
|
||||
/// A bare expression statement, e.g. `f(x);`.
|
||||
///
|
||||
/// The trailing `;` is not stored in the node but is included in
|
||||
/// [`Statement::span`].
|
||||
Expr(Expression<P>),
|
||||
}
|
||||
46
src/cli.rs
46
src/cli.rs
@@ -1,7 +1,18 @@
|
||||
//! Command-line interface: argument parsing, help/version output, and fatal
|
||||
//! error reporting.
|
||||
//!
|
||||
//! The primary entry point is [`parse_args`], which parses [`std::env::args`]
|
||||
//! and returns an [`Opts`] struct. If any argument is invalid or required
|
||||
//! arguments are missing, it calls [`fatal`] which prints an error to `stderr`
|
||||
//! and exits with code `1`.
|
||||
use std::path::PathBuf;
|
||||
|
||||
use yansi::Paint;
|
||||
|
||||
/// Print the help message to `stdout`.
|
||||
///
|
||||
/// Describes the compiler's usage, all supported options, and the `<file>`
|
||||
/// positional argument.
|
||||
pub fn print_help() {
|
||||
println!(
|
||||
"{} {} - the bucky language compiler",
|
||||
@@ -42,32 +53,57 @@ pub fn print_help() {
|
||||
println!();
|
||||
println!("{}", "ARGS:".bold().yellow());
|
||||
println!(
|
||||
" {} One or more Flux source files to compile",
|
||||
" {} One or more source files to compile",
|
||||
"<file>".bold(),
|
||||
);
|
||||
}
|
||||
|
||||
/// Print the compiler version string (`buckyc <version>`) to `stdout`.
|
||||
pub fn print_version() {
|
||||
println!("buckyc {}", env!("CARGO_PKG_VERSION"));
|
||||
}
|
||||
|
||||
/// Print a formatted error message to `stderr` and exit with code `1`.
|
||||
///
|
||||
/// This function never returns (`-> !`). Use it for unrecoverable CLI errors
|
||||
/// such as missing arguments or unknown flags, discovered before compilation
|
||||
/// begins.
|
||||
pub fn fatal(message: impl ToString) -> ! {
|
||||
eprintln!("{}: {}", "error".bold().red(), message.to_string().bold());
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
/// Parsed command-line options returned by [`parse_args`].
|
||||
#[derive(Debug)]
|
||||
pub struct Opts {
|
||||
/// The list of files passed to the compiler.
|
||||
/// One or more source files to compile, in the order they were supplied.
|
||||
pub files: Vec<PathBuf>,
|
||||
/// `-S`: emit IR and stop (implies `-c`).
|
||||
/// `-S`: emit IR and stop (implies [`no_link`](Opts::no_link)).
|
||||
pub emit_ir: bool,
|
||||
/// `-c`: compile source to object file without linking.
|
||||
/// `-c`: compile to an object file without invoking the linker.
|
||||
pub no_link: bool,
|
||||
/// `-o <file>`: write final output to this path.
|
||||
/// `-o <file>`: destination path for the final output. When `None` the
|
||||
/// compiler chooses a default output name.
|
||||
pub output: Option<PathBuf>,
|
||||
}
|
||||
|
||||
/// Parse [`std::env::args`] and return the resulting [`Opts`].
|
||||
///
|
||||
/// Recognised flags:
|
||||
///
|
||||
/// | Flag | Effect |
|
||||
/// |------|--------|
|
||||
/// | `-h`, `--help` | Print help and exit `0` |
|
||||
/// | `-V`, `--version` | Print version and exit `0` |
|
||||
/// | `-S` | Set [`emit_ir`](Opts::emit_ir) and [`no_link`](Opts::no_link) |
|
||||
/// | `-c` | Set [`no_link`](Opts::no_link) |
|
||||
/// | `-o <file>` | Set [`output`](Opts::output) |
|
||||
/// | `<file>` | Append to [`files`](Opts::files) |
|
||||
///
|
||||
/// Calls [`fatal`] (and exits) if:
|
||||
/// - an unknown `-`-prefixed flag is encountered, or
|
||||
/// - `-o` is supplied without a following argument, or
|
||||
/// - no source files are provided.
|
||||
pub fn parse_args() -> Opts {
|
||||
let mut files = Vec::new();
|
||||
let mut no_link = false;
|
||||
|
||||
253
src/diagnostic.rs
Normal file
253
src/diagnostic.rs
Normal file
@@ -0,0 +1,253 @@
|
||||
//! Compiler diagnostic reporting with source-location context.
|
||||
//!
|
||||
//! This module provides [`Diagnostic`], a structured error/warning message that
|
||||
//! can optionally include a source span and one or more labelled secondary
|
||||
//! spans. Diagnostics are rendered to `stderr` in a rustc-inspired format:
|
||||
//!
|
||||
//! ```text
|
||||
//! Error: undeclared variable `x`
|
||||
//! --> src/main.bky:3:5
|
||||
//! |
|
||||
//! 3 | let y = x + 1;
|
||||
//! | ^ undeclared variable
|
||||
//! |
|
||||
//! ```
|
||||
use std::{fmt::Display, path::Path, process::exit};
|
||||
|
||||
use yansi::Paint;
|
||||
|
||||
use crate::token::Span;
|
||||
|
||||
/// The importance level of a [`Diagnostic`].
|
||||
///
|
||||
/// Variants are ordered from least to most severe so that `<` / `>` comparisons
|
||||
/// work intuitively (e.g. `Severity::Warning < Severity::Error`).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Severity {
|
||||
/// Purely informational; never causes the compiler to stop.
|
||||
Note,
|
||||
/// Something suspicious that may or may not be a problem.
|
||||
Warning,
|
||||
/// A recoverable problem that prevents successful compilation.
|
||||
Error,
|
||||
/// An unrecoverable problem; the process will exit immediately after
|
||||
/// reporting this diagnostic.
|
||||
Critical,
|
||||
}
|
||||
|
||||
impl Display for Severity {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Severity::Note => write!(f, "{}", "Note".bold().cyan()),
|
||||
Severity::Warning => write!(f, "{}", "Warning".bold().yellow()),
|
||||
Severity::Error => write!(f, "{}", "Error".bold().red()),
|
||||
Severity::Critical => write!(f, "{}", "Critical".bold().magenta()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A single compiler message with optional source-location information.
|
||||
///
|
||||
/// Build a diagnostic with [`Diagnostic::new`], optionally attach a primary
|
||||
/// source location via [`with_span`](Diagnostic::with_span), attach labelled
|
||||
/// secondary locations via [`add_label`](Diagnostic::add_label), then call
|
||||
/// [`report`](Diagnostic::report) to print it.
|
||||
///
|
||||
/// If the severity is [`Severity::Critical`], `report` will call
|
||||
/// [`process::exit`](std::process::exit) after printing.
|
||||
pub struct Diagnostic {
|
||||
pub severity: Severity,
|
||||
/// Primary source location, if any.
|
||||
pub span: Option<Span>,
|
||||
pub message: String,
|
||||
/// Secondary labelled spans rendered below the primary snippet.
|
||||
pub labels: Vec<(Span, String)>,
|
||||
}
|
||||
|
||||
impl Diagnostic {
|
||||
/// Create a new diagnostic with the given severity and message.
|
||||
///
|
||||
/// No source location is attached; use [`with_span`](Self::with_span) to
|
||||
/// add one.
|
||||
pub fn new(severity: Severity, message: impl ToString) -> Self {
|
||||
Self {
|
||||
severity,
|
||||
span: None,
|
||||
message: message.to_string(),
|
||||
labels: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Attach a primary source span to this diagnostic.
|
||||
pub fn with_span(mut self, span: Span) -> Self {
|
||||
self.span = Some(span);
|
||||
self
|
||||
}
|
||||
|
||||
/// Attach a labelled secondary span.
|
||||
///
|
||||
/// Labels whose span matches the primary span exactly are merged into the
|
||||
/// primary underline as inline text. All other labels are rendered as
|
||||
/// separate snippets below the primary one.
|
||||
pub fn add_label(mut self, span: Span, message: impl ToString) -> Self {
|
||||
self.labels.push((span, message.to_string()));
|
||||
self
|
||||
}
|
||||
|
||||
/// Print this diagnostic to `stderr` and, if the severity is
|
||||
/// [`Severity::Critical`], terminate the process.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `file_name` – path shown in the `-->` location line.
|
||||
/// * `source` – full source text of the file, used to extract line/col
|
||||
/// information and to display the relevant source snippet.
|
||||
pub fn report(self, file_name: &Path, source: &str) {
|
||||
eprintln!("{}: {}", self.severity, self.message.bold());
|
||||
|
||||
let Some(primary_span) = self.span else {
|
||||
eprintln!(" {} {}", "-->".bright_black(), file_name.display());
|
||||
if self.severity == Severity::Critical {
|
||||
exit(-1);
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
// Guard: no source context available (e.g. critical error before any
|
||||
// file is read).
|
||||
if source.is_empty() || primary_span.start as usize >= source.len() {
|
||||
eprintln!(" {} {}", "-->".bright_black(), file_name.display());
|
||||
if self.severity == Severity::Critical {
|
||||
exit(-1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let (primary_line, primary_col) = get_line_col(source, primary_span.start);
|
||||
|
||||
// Partition labels: those on the *exact same span* as the primary are
|
||||
// merged into the primary underline as inline text. All others are
|
||||
// rendered as separate snippets below the primary.
|
||||
let (same_span, other_span): (Vec<_>, Vec<_>) = self
|
||||
.labels
|
||||
.into_iter()
|
||||
.partition(|(s, _)| *s == primary_span);
|
||||
|
||||
let primary_label: Option<String> = same_span.into_iter().next().map(|(_, m)| m);
|
||||
|
||||
// Gutter must be wide enough for the highest line number we'll print.
|
||||
let max_line = other_span
|
||||
.iter()
|
||||
.filter(|(s, _)| (s.start as usize) < source.len())
|
||||
.map(|(s, _)| get_line_col(source, s.start).0)
|
||||
.fold(primary_line, usize::max);
|
||||
let gutter_w = count_digits(max_line);
|
||||
let pad = " ".repeat(gutter_w);
|
||||
|
||||
// " --> file:line:col"
|
||||
eprintln!(
|
||||
"{} {}:{}:{}",
|
||||
format!("{pad} -->").bright_black(),
|
||||
file_name.display(),
|
||||
primary_line,
|
||||
primary_col,
|
||||
);
|
||||
eprintln!("{}", format!("{pad} |").bright_black());
|
||||
|
||||
// Primary snippet.
|
||||
render_snippet(
|
||||
source,
|
||||
primary_span,
|
||||
primary_label.as_deref(),
|
||||
gutter_w,
|
||||
self.severity,
|
||||
);
|
||||
|
||||
// Additional-context labels (different locations).
|
||||
for (span, msg) in &other_span {
|
||||
if (span.start as usize) < source.len() {
|
||||
render_snippet(source, *span, Some(msg.as_str()), gutter_w, Severity::Note);
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("{}", format!("{pad} |").bright_black());
|
||||
|
||||
if self.severity == Severity::Critical {
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Render a single source-line snippet: the numbered source line followed by
|
||||
/// a `^^^` underline. When `label` is `Some`, the text is appended after the
|
||||
/// carets on the same line.
|
||||
fn render_snippet(
|
||||
source: &str,
|
||||
span: Span,
|
||||
label: Option<&str>,
|
||||
gutter_w: usize,
|
||||
severity: Severity,
|
||||
) {
|
||||
let (line_num, _) = get_line_col(source, span.start);
|
||||
let (line_start, line_content) = get_line_content(source, span.start);
|
||||
|
||||
let pad = " ".repeat(gutter_w);
|
||||
let bar = format!("{}", "|".bright_black());
|
||||
let line_num_str = format!("{:>width$}", line_num, width = gutter_w);
|
||||
|
||||
// "N | source text"
|
||||
eprintln!("{} {bar} {line_content}", line_num_str.bright_black());
|
||||
|
||||
// Caret underline, clamped to the current line.
|
||||
let col_offset = span.start as usize - line_start;
|
||||
let line_end_byte = line_start + line_content.len();
|
||||
let underline_len = (span.end as usize)
|
||||
.min(line_end_byte)
|
||||
.saturating_sub(span.start as usize)
|
||||
.max(1);
|
||||
|
||||
let spaces = " ".repeat(col_offset);
|
||||
let carets = "^".repeat(underline_len);
|
||||
let colored_carets = paint_severity(&carets, severity);
|
||||
let label_text = label
|
||||
.map(|l| format!(" {}", paint_severity(l, severity)))
|
||||
.unwrap_or_default();
|
||||
|
||||
// " | ^^^label"
|
||||
eprintln!("{pad} {bar} {spaces}{colored_carets}{label_text}");
|
||||
}
|
||||
|
||||
/// Apply severity-appropriate ANSI colour to a string.
|
||||
fn paint_severity(s: &str, severity: Severity) -> String {
|
||||
match severity {
|
||||
Severity::Note => format!("{}", s.bold().bright_cyan()),
|
||||
Severity::Warning => format!("{}", s.bold().bright_yellow()),
|
||||
Severity::Error | Severity::Critical => format!("{}", s.bold().bright_red()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of decimal digits in `n` (minimum 1).
|
||||
fn count_digits(n: usize) -> usize {
|
||||
format!("{n}").len()
|
||||
}
|
||||
|
||||
/// Returns `(line_start_byte, line_content)` for the line that contains
|
||||
/// `position`. The returned content does *not* include the trailing newline.
|
||||
fn get_line_content(source: &str, position: u32) -> (usize, &str) {
|
||||
let pos = position as usize;
|
||||
let line_start = source[..pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
|
||||
let rest = &source[line_start..];
|
||||
let line_len = rest.find('\n').unwrap_or(rest.len());
|
||||
(line_start, &rest[..line_len])
|
||||
}
|
||||
|
||||
/// Returns the 1-based `(line, column)` for a byte `position` within `source`.
|
||||
///
|
||||
/// Both line and column are counted from 1. The column is measured in Unicode
|
||||
/// scalar values (characters), not bytes.
|
||||
fn get_line_col(source: &str, position: u32) -> (usize, usize) {
|
||||
let prefix = &source[..position as usize];
|
||||
let line = prefix.bytes().filter(|&b| b == b'\n').count() + 1;
|
||||
let line_start_byte = prefix.rfind('\n').map(|i| i + 1).unwrap_or(0);
|
||||
let col = prefix[line_start_byte..].chars().count() + 1;
|
||||
(line, col)
|
||||
}
|
||||
309
src/lexer.rs
Normal file
309
src/lexer.rs
Normal file
@@ -0,0 +1,309 @@
|
||||
//! Lexer (tokeniser) that converts raw source text into a [`Token`] stream.
|
||||
//!
|
||||
//! [`Lexer`] implements [`Iterator<Item = Token>`] so it can be used directly
|
||||
//! in a `for` loop or with iterator adaptors such as `.peekable()`.
|
||||
//! Whitespace and `#`-line-comments are skipped automatically between tokens.
|
||||
//!
|
||||
//! # Character classes
|
||||
//! - **Identifiers / keywords** — start with a
|
||||
//! [XID_Start](https://unicode.org/reports/tr31/) character or `_`, continue
|
||||
//! with XID_Continue characters. Reserved words are mapped to their
|
||||
//! respective [`TokenKind`] variants; everything else becomes
|
||||
//! [`TokenKind::Identifier`].
|
||||
//! - **Integer literals** — decimal by default; `0x` / `0o` / `0b` prefixes
|
||||
//! select hexadecimal, octal, and binary respectively.
|
||||
//! - **String literals** — delimited by `"…"`; `\` escapes the next character.
|
||||
//! - **Operators and punctuation** — single- or double-character tokens
|
||||
//! dispatched via the `token!` macro with one character of lookahead.
|
||||
use std::{iter::Peekable, str::Chars};
|
||||
|
||||
use unicode_xid::UnicodeXID;
|
||||
|
||||
use crate::token::{Span, Token, TokenKind};
|
||||
|
||||
/// A lazy iterator over the [`Token`]s of a source string.
|
||||
///
|
||||
/// Tokens borrow their text slice directly from the original source, so the
|
||||
/// lexer lifetime `'src` must outlive any use of the produced tokens.
|
||||
///
|
||||
/// Construct with [`Lexer::new`] and consume via the [`Iterator`] impl or by
|
||||
/// passing it to the parser.
|
||||
pub struct Lexer<'src> {
|
||||
/// One-character look-ahead over the source characters.
|
||||
chars: Peekable<Chars<'src>>,
|
||||
/// The full source text, kept for slice extraction in [`make`](Self::make).
|
||||
source: &'src str,
|
||||
/// Current byte offset into `source`. Advanced by [`advance`](Self::advance).
|
||||
position: usize,
|
||||
}
|
||||
|
||||
impl<'src> Lexer<'src> {
|
||||
/// Creates a new [`Lexer`] positioned at the start of `source`.
|
||||
pub fn new(source: &'src str) -> Self {
|
||||
Self {
|
||||
chars: source.chars().peekable(),
|
||||
source,
|
||||
position: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Peek at the next character without consuming it.
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
self.chars.peek().copied()
|
||||
}
|
||||
|
||||
/// Consume and return the next character, advancing [`position`](Self::position)
|
||||
/// by the character's UTF-8 byte length.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if called at the end of input. Always guard with
|
||||
/// [`peek`](Self::peek) first.
|
||||
fn advance(&mut self) -> char {
|
||||
let ch = self.chars.next().expect("failed to advance the lexer");
|
||||
self.position += ch.len_utf8();
|
||||
ch
|
||||
}
|
||||
|
||||
/// Advance while `condition` holds, stopping at the first character for
|
||||
/// which it returns `false` (or at end of input).
|
||||
fn advance_while(&mut self, condition: impl FnMut(char) -> bool + Copy) {
|
||||
while self.peek().is_some_and(condition) {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a [`Token`] spanning the byte range `[start, self.position)`.
|
||||
///
|
||||
/// The token's `text` is a zero-copy slice of the source string.
|
||||
fn make(&self, kind: TokenKind, start: usize) -> Token<'src> {
|
||||
Token {
|
||||
kind,
|
||||
span: Span::new(start as u32, self.position as u32),
|
||||
text: &self.source[start..self.position],
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip any run of whitespace followed by a `#` line comment, repeating
|
||||
/// until neither is present.
|
||||
///
|
||||
/// Comments begin with `#` and extend to (but do not include) the
|
||||
/// following `\n`.
|
||||
fn skip_whitespace_and_comments(&mut self) {
|
||||
loop {
|
||||
self.advance_while(char::is_whitespace);
|
||||
|
||||
if self.peek() == Some('#') {
|
||||
self.advance_while(|ch| ch != '\n');
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex the next identifier or keyword token.
|
||||
///
|
||||
/// Assumes the current peek character satisfies `is_xid_start() || == '_'`.
|
||||
/// Consumes one XID_Start (or `_`) character followed by any number of
|
||||
/// XID_Continue characters, then matches the resulting slice against the
|
||||
/// keyword / type-keyword table.
|
||||
fn next_identifier(&mut self) -> TokenKind {
|
||||
let start = self.position;
|
||||
|
||||
self.advance();
|
||||
self.advance_while(|ch| ch.is_xid_continue());
|
||||
|
||||
match &self.source[start..self.position] {
|
||||
"and" => TokenKind::KwAnd,
|
||||
"or" => TokenKind::KwOr,
|
||||
"as" => TokenKind::KwAs,
|
||||
"let" => TokenKind::KwLet,
|
||||
|
||||
"u8" => TokenKind::TyU8,
|
||||
"u16" => TokenKind::TyU16,
|
||||
"u32" => TokenKind::TyU32,
|
||||
"u64" => TokenKind::TyU64,
|
||||
|
||||
"i8" => TokenKind::TyI8,
|
||||
"i16" => TokenKind::TyI16,
|
||||
"i32" => TokenKind::TyI32,
|
||||
"i64" => TokenKind::TyI64,
|
||||
|
||||
"bool" => TokenKind::TyBool,
|
||||
|
||||
"true" | "false" => TokenKind::LitBool,
|
||||
_ => TokenKind::Identifier,
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex the next integer literal token.
|
||||
///
|
||||
/// Assumes the current peek character is an ASCII digit. Detects an
|
||||
/// optional radix prefix (`0x` → 16, `0o` → 8, `0b` → 2) then consumes
|
||||
/// all subsequent digits valid for that radix. Always returns
|
||||
/// [`TokenKind::LitInt`].
|
||||
fn next_number(&mut self) -> TokenKind {
|
||||
let radix = match self.advance() {
|
||||
'0' => match self.peek() {
|
||||
Some('x') => {
|
||||
self.advance();
|
||||
16
|
||||
}
|
||||
Some('o') => {
|
||||
self.advance();
|
||||
8
|
||||
}
|
||||
Some('b') => {
|
||||
self.advance();
|
||||
2
|
||||
}
|
||||
_ => 10,
|
||||
},
|
||||
_ => 10,
|
||||
};
|
||||
|
||||
self.advance_while(|ch| ch.is_digit(radix));
|
||||
|
||||
TokenKind::LitInt
|
||||
}
|
||||
|
||||
/// Lex the next string literal token.
|
||||
///
|
||||
/// Assumes the current peek character is `"`. Consumes characters until
|
||||
/// a closing (unescaped) `"` is found or input is exhausted. A `\`
|
||||
/// escapes the immediately following character, preventing it from being
|
||||
/// treated as a closing delimiter. Always returns [`TokenKind::LitString`].
|
||||
///
|
||||
/// Note: escape sequences are not validated here; that is left to a later
|
||||
/// compiler stage.
|
||||
fn next_string(&mut self) -> TokenKind {
|
||||
let mut escaped = false;
|
||||
|
||||
self.advance();
|
||||
|
||||
while let Some(ch) = self.peek() {
|
||||
if escaped {
|
||||
self.advance();
|
||||
escaped = false;
|
||||
} else if ch == '\\' {
|
||||
self.advance();
|
||||
escaped = true;
|
||||
} else if ch == '"' {
|
||||
self.advance();
|
||||
break;
|
||||
} else {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
TokenKind::LitString
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src> Iterator for Lexer<'src> {
|
||||
type Item = Token<'src>;
|
||||
|
||||
/// Returns the next [`Token`], or `None` when the source is exhausted.
|
||||
///
|
||||
/// Leading whitespace and `#`-comments are skipped before each token.
|
||||
/// Multi-character operator tokens (`->`, `<<`, `<=`, …) are resolved with
|
||||
/// a single character of lookahead via the `token!` macro. Unrecognised
|
||||
/// characters are returned as [`TokenKind::Unknown`].
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.skip_whitespace_and_comments();
|
||||
|
||||
let start = self.position;
|
||||
|
||||
/// Builds and evaluates a [`TokenKind`] from the current position.
|
||||
///
|
||||
/// Three forms:
|
||||
/// - `token!($kind)` — single-character token: advance once, yield `$kind`.
|
||||
/// - `token!($c => $kind, … ; $default)` — multi-character token with
|
||||
/// lookahead: advance once (consuming the lead character), then
|
||||
/// check the next character against each `$c => $kind` arm in order,
|
||||
/// falling back to `$default` if none match.
|
||||
macro_rules! token {
|
||||
// Case 1: Simple token (no lookahead)
|
||||
($default:expr) => {{
|
||||
self.advance();
|
||||
$default
|
||||
}};
|
||||
|
||||
// Case 2: Multi-character lookahead entry point
|
||||
($($c:expr => $kind:expr),+ ; $default:expr) => {{
|
||||
self.advance();
|
||||
token!(@step $($c => $kind),+ ; $default)
|
||||
}};
|
||||
|
||||
// Internal Recursive step: More than one pair remains
|
||||
(@step $c:expr => $kind:expr, $($rest_c:expr => $rest_k:expr),+ ; $default:expr) => {
|
||||
if self.peek() == Some($c) {
|
||||
self.advance();
|
||||
$kind
|
||||
} else {
|
||||
token!(@step $($rest_c => $rest_k),+ ; $default)
|
||||
}
|
||||
};
|
||||
|
||||
// Internal Base case: Last pair in the lookahead chain
|
||||
(@step $c:expr => $kind:expr ; $default:expr) => {
|
||||
if self.peek() == Some($c) {
|
||||
self.advance();
|
||||
$kind
|
||||
} else {
|
||||
$default
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let kind = match self.peek()? {
|
||||
ch if ch.is_xid_start() || ch == '_' => self.next_identifier(),
|
||||
'0'..='9' => self.next_number(),
|
||||
'"' => self.next_string(),
|
||||
|
||||
'+' => token!(TokenKind::Plus),
|
||||
'-' => token!(
|
||||
'>' => TokenKind::Arrow;
|
||||
TokenKind::Minus
|
||||
),
|
||||
'*' => token!(TokenKind::Star),
|
||||
'/' => token!(TokenKind::Slash),
|
||||
'%' => token!(TokenKind::Percent),
|
||||
'&' => token!(TokenKind::Amp),
|
||||
'|' => token!(TokenKind::Pipe),
|
||||
'^' => token!(TokenKind::Caret),
|
||||
'~' => token!(TokenKind::Tilde),
|
||||
'<' => token!(
|
||||
'<' => TokenKind::Shl,
|
||||
'=' => TokenKind::Le;
|
||||
TokenKind::Lt
|
||||
),
|
||||
'>' => token!(
|
||||
'>' => TokenKind::Shr,
|
||||
'=' => TokenKind::Ge;
|
||||
TokenKind::Gt
|
||||
),
|
||||
'!' => token!(
|
||||
'=' => TokenKind::Ne;
|
||||
TokenKind::Bang
|
||||
),
|
||||
'=' => token!(
|
||||
'=' => TokenKind::Eq;
|
||||
TokenKind::Assign
|
||||
),
|
||||
'.' => token!(TokenKind::Dot),
|
||||
',' => token!(TokenKind::Comma),
|
||||
':' => token!(TokenKind::Colon),
|
||||
';' => token!(TokenKind::Semi),
|
||||
'(' => token!(TokenKind::LParen),
|
||||
')' => token!(TokenKind::RParen),
|
||||
'[' => token!(TokenKind::LBracket),
|
||||
']' => token!(TokenKind::RBracket),
|
||||
'{' => token!(TokenKind::LCurly),
|
||||
'}' => token!(TokenKind::RCurly),
|
||||
|
||||
_ => token!(TokenKind::Unknown),
|
||||
};
|
||||
|
||||
Some(self.make(kind, start))
|
||||
}
|
||||
}
|
||||
38
src/main.rs
38
src/main.rs
@@ -1,9 +1,43 @@
|
||||
use crate::cli::parse_args;
|
||||
use std::fs;
|
||||
|
||||
use crate::{
|
||||
cli::{fatal, parse_args},
|
||||
parser::Parser,
|
||||
};
|
||||
|
||||
mod ast;
|
||||
mod cli;
|
||||
mod diagnostic;
|
||||
mod lexer;
|
||||
mod parser;
|
||||
mod token;
|
||||
|
||||
fn main() {
|
||||
let opts = parse_args();
|
||||
|
||||
println!("{opts:#?}");
|
||||
for file in &opts.files {
|
||||
let content = match fs::read_to_string(file) {
|
||||
Ok(content) => content,
|
||||
Err(error) => {
|
||||
fatal(format!(
|
||||
"failed to read {}: {:?}",
|
||||
file.display(),
|
||||
error.kind()
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
println!("-- {} --", file.display());
|
||||
let mut parser = Parser::new(&content);
|
||||
|
||||
match parser.parse_statement() {
|
||||
Ok(ast) => println!("{ast:#?}"),
|
||||
Err(diag) => diag.report(file, &content),
|
||||
}
|
||||
|
||||
parser
|
||||
.errors
|
||||
.into_iter()
|
||||
.for_each(|diag| diag.report(file, &content));
|
||||
}
|
||||
}
|
||||
|
||||
558
src/parser.rs
Normal file
558
src/parser.rs
Normal file
@@ -0,0 +1,558 @@
|
||||
//! Recursive-descent / Pratt parser that converts a token stream into an AST.
|
||||
//!
|
||||
//! The entry points are [`Parser::parse_statement`], [`Parser::parse_type`],
|
||||
//! and [`Parser::parse_expression`].
|
||||
//! Errors are represented as [`Diagnostic`] values; the caller is responsible
|
||||
//! for reporting them.
|
||||
use std::iter::Peekable;
|
||||
|
||||
use crate::ast;
|
||||
use crate::diagnostic::{Diagnostic, Severity};
|
||||
use crate::lexer::Lexer;
|
||||
use crate::token::{Token, TokenKind};
|
||||
|
||||
/// Consumes the [`Token`] stream produced by the [`Lexer`] and constructs an
|
||||
/// AST in the [`ast::Parsed`] phase.
|
||||
///
|
||||
/// The parser uses a single token of look-ahead (peek) for all decisions.
|
||||
/// Expression parsing is implemented with the
|
||||
/// [Pratt / top-down operator-precedence](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html)
|
||||
/// algorithm; binding-power tables are defined by [`infix_binding_power`],
|
||||
/// [`prefix_binding_power`], and [`postfix_binding_power`].
|
||||
pub struct Parser<'src> {
|
||||
tokens: Peekable<Lexer<'src>>,
|
||||
/// Diagnostics accumulated during parsing. Non-fatal errors are pushed here
|
||||
/// so that the parser can attempt to continue and surface multiple issues
|
||||
/// in a single pass.
|
||||
pub errors: Vec<Diagnostic>,
|
||||
}
|
||||
|
||||
impl<'src> Parser<'src> {
|
||||
/// Constructs a new [`Parser`] with the given source text.
|
||||
pub fn new(source: &'src str) -> Self {
|
||||
Self {
|
||||
tokens: Lexer::new(source).peekable(),
|
||||
errors: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Peek at the next [`Token`] without consuming it.
|
||||
fn peek(&mut self) -> Option<Token<'src>> {
|
||||
self.tokens.peek().copied()
|
||||
}
|
||||
|
||||
/// Peek at the next [`Token`], returning an [`Err`] diagnostic if the
|
||||
/// token stream is exhausted.
|
||||
fn peek_no_eof(&mut self) -> Result<Token<'src>, Diagnostic> {
|
||||
self.peek()
|
||||
.ok_or_else(|| Diagnostic::new(Severity::Error, "unexpected end of input"))
|
||||
}
|
||||
|
||||
/// Returns `true` if the next token has the given [`TokenKind`].
|
||||
fn is_peek(&mut self, kind: TokenKind) -> bool {
|
||||
self.peek().map_or(false, |tok| tok.is(kind))
|
||||
}
|
||||
|
||||
/// Returns `true` if the token stream is exhausted.
|
||||
fn is_at_eof(&mut self) -> bool {
|
||||
self.peek().is_none()
|
||||
}
|
||||
|
||||
/// Consumes and returns the next [`Token`].
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if called at the end of input. Always check [`is_at_eof`](Self::is_at_eof)
|
||||
/// or use [`peek_no_eof`](Self::peek_no_eof) / [`expect`](Self::expect) in
|
||||
/// production code paths.
|
||||
fn advance(&mut self) -> Token<'src> {
|
||||
self.tokens.next().expect("failed to advance the parser")
|
||||
}
|
||||
|
||||
/// Consumes and returns the next [`Token`] if it matches `kind`; otherwise
|
||||
/// returns an [`Err`] diagnostic that points at the offending token.
|
||||
fn expect(&mut self, kind: TokenKind) -> Result<Token<'src>, Diagnostic> {
|
||||
match self.peek() {
|
||||
Some(tok) if tok.is(kind) => Ok(self.advance()),
|
||||
Some(tok) => Err(Diagnostic::new(Severity::Error, "unexpected token found")
|
||||
.with_span(tok.span)
|
||||
.add_label(
|
||||
tok.span,
|
||||
format!("expected {} but found {} instead", kind, tok.kind),
|
||||
)),
|
||||
None => Err(Diagnostic::new(Severity::Error, "unexpected end of input")),
|
||||
}
|
||||
}
|
||||
|
||||
/// Error-recovery helper: skips tokens until a statement boundary is
|
||||
/// reached so that subsequent statements can still be parsed cleanly.
|
||||
///
|
||||
/// Stops *after* consuming a `;`, or *before* consuming a `}`. This keeps
|
||||
/// nested blocks intact when recovering inside function bodies.
|
||||
fn synchronize(&mut self) {
|
||||
while let Some(peek) = self.peek() {
|
||||
match peek.kind {
|
||||
// Consume the `;` and stop
|
||||
TokenKind::Semi => {
|
||||
self.advance();
|
||||
break;
|
||||
}
|
||||
|
||||
// Stop before these
|
||||
TokenKind::RCurly => break,
|
||||
|
||||
_ => _ = self.advance(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the next statement.
|
||||
///
|
||||
/// Dispatches to the appropriate specialised parser based on the leading
|
||||
/// token:
|
||||
/// - `let` → [`parse_let_statement`](Self::parse_let_statement)
|
||||
/// - `{` → [`parse_compound_statement`](Self::parse_compound_statement)
|
||||
/// - anything else → an expression followed by a mandatory `;`
|
||||
pub fn parse_statement(&mut self) -> Result<ast::ParsedStatement, Diagnostic> {
|
||||
let peek = self.peek_no_eof()?;
|
||||
|
||||
match peek.kind {
|
||||
TokenKind::KwLet => self.parse_let_statement(),
|
||||
TokenKind::LCurly => self.parse_compound_statement(),
|
||||
|
||||
_ => {
|
||||
let expr = self.parse_expression(0)?;
|
||||
let semi_token = self.expect(TokenKind::Semi)?;
|
||||
let span = expr.span.extend(semi_token.span);
|
||||
|
||||
Ok(ast::ParsedStatement {
|
||||
kind: ast::StatementKind::Expr(expr),
|
||||
span,
|
||||
extra: (),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a `let` binding statement: `let <name>[: <type>] [= <expr>];`.
|
||||
///
|
||||
/// Both the type annotation and the initialiser are optional. The
|
||||
/// statement span runs from the `let` keyword through to the closing `;`.
|
||||
fn parse_let_statement(&mut self) -> Result<ast::ParsedStatement, Diagnostic> {
|
||||
let let_token = self.expect(TokenKind::KwLet)?;
|
||||
|
||||
let (name, name_span) = {
|
||||
let ident_token = self.expect(TokenKind::Identifier)?;
|
||||
(ident_token.text.to_string(), ident_token.span)
|
||||
};
|
||||
|
||||
let ty = if self.is_peek(TokenKind::Colon) {
|
||||
self.advance();
|
||||
Some(self.parse_type()?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let value = if self.is_peek(TokenKind::Assign) {
|
||||
self.advance();
|
||||
Some(self.parse_expression(0)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let semi_token = self.expect(TokenKind::Semi)?;
|
||||
|
||||
Ok(ast::ParsedStatement {
|
||||
kind: ast::StatementKind::Let {
|
||||
name,
|
||||
name_span,
|
||||
ty,
|
||||
value,
|
||||
},
|
||||
span: let_token.span.extend(semi_token.span),
|
||||
extra: (),
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses a braced block of statements: `{ <stmt>* }`.
|
||||
///
|
||||
/// Each inner statement is parsed with [`parse_statement`](Self::parse_statement).
|
||||
/// If a statement fails, the diagnostic is pushed onto [`errors`](Parser::errors)
|
||||
/// and [`synchronize`](Self::synchronize) is called so that parsing can
|
||||
/// continue with the next statement. The block span runs from `{` to `}`.
|
||||
fn parse_compound_statement(&mut self) -> Result<ast::ParsedStatement, Diagnostic> {
|
||||
let lcurly_token = self.expect(TokenKind::LCurly)?;
|
||||
let mut inner = Vec::new();
|
||||
|
||||
while !self.is_at_eof() && !self.is_peek(TokenKind::RCurly) {
|
||||
match self.parse_statement() {
|
||||
Ok(stmt) => inner.push(stmt),
|
||||
Err(diag) => {
|
||||
self.errors.push(diag);
|
||||
self.synchronize();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let rcurly_token = self.expect(TokenKind::RCurly)?;
|
||||
let span = lcurly_token.span.extend(rcurly_token.span);
|
||||
|
||||
Ok(ast::ParsedStatement {
|
||||
kind: ast::StatementKind::Compound { inner },
|
||||
span,
|
||||
extra: (),
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses a type annotation, e.g. `u8`, `i64`, `bool`, or a user-defined
|
||||
/// named type.
|
||||
///
|
||||
/// Returns an [`Err`] diagnostic if the next token is not a valid type.
|
||||
pub fn parse_type(&mut self) -> Result<ast::ParsedType, Diagnostic> {
|
||||
let peek = self.peek_no_eof()?;
|
||||
|
||||
let kind = match peek.kind {
|
||||
TokenKind::TyU8 => ast::TypeKind::U8,
|
||||
TokenKind::TyU16 => ast::TypeKind::U16,
|
||||
TokenKind::TyU32 => ast::TypeKind::U32,
|
||||
TokenKind::TyU64 => ast::TypeKind::U64,
|
||||
|
||||
TokenKind::TyI8 => ast::TypeKind::I8,
|
||||
TokenKind::TyI16 => ast::TypeKind::I16,
|
||||
TokenKind::TyI32 => ast::TypeKind::I32,
|
||||
TokenKind::TyI64 => ast::TypeKind::I64,
|
||||
|
||||
TokenKind::TyBool => ast::TypeKind::Bool,
|
||||
|
||||
TokenKind::Identifier => ast::TypeKind::Named(peek.text.to_string()),
|
||||
|
||||
_ => {
|
||||
return Err(
|
||||
Diagnostic::new(Severity::Error, "expected a type").with_span(peek.span)
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let span = self.advance().span;
|
||||
|
||||
Ok(ast::ParsedType {
|
||||
kind,
|
||||
span,
|
||||
extra: (),
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses an expression using the Pratt (top-down operator-precedence)
|
||||
/// algorithm.
|
||||
///
|
||||
/// `min_bp` is the minimum *left* binding power the next infix/postfix
|
||||
/// operator must have to be incorporated into the current expression. Pass
|
||||
/// `0` to parse a full expression with no restrictions.
|
||||
///
|
||||
/// The precedence hierarchy (low → high) is:
|
||||
/// - assignment (`=`)
|
||||
/// - logical `or` / `and`
|
||||
/// - bitwise `|` / `^` / `&`
|
||||
/// - equality (`==`, `!=`) and comparison (`<`, `<=`, `>`, `>=`)
|
||||
/// - addition / subtraction
|
||||
/// - shifts (`<<`, `>>`)
|
||||
/// - multiplication / division / remainder
|
||||
/// - member access (`.`)
|
||||
/// - postfix: call `()`, index `[]`, cast `as`
|
||||
/// - prefix: `-`, `&`, `~`, `*`, `!`
|
||||
pub fn parse_expression(&mut self, min_bp: u8) -> Result<ast::ParsedExpression, Diagnostic> {
|
||||
let peek_token = self.peek_no_eof()?;
|
||||
|
||||
let mut left = if let Some((right_bp, op)) = prefix_binding_power(peek_token.kind) {
|
||||
let op_span = self.advance().span;
|
||||
let operand = Box::new(self.parse_expression(right_bp)?);
|
||||
|
||||
let span = op_span.extend(operand.span);
|
||||
|
||||
ast::ParsedExpression {
|
||||
kind: ast::ExpressionKind::Unary {
|
||||
op,
|
||||
op_span,
|
||||
operand,
|
||||
},
|
||||
span,
|
||||
extra: (),
|
||||
}
|
||||
} else {
|
||||
self.parse_primary_expression()?
|
||||
};
|
||||
|
||||
while let Some(peek_token) = self.peek() {
|
||||
if let Some(left_bp) = postfix_binding_power(peek_token.kind) {
|
||||
if left_bp < min_bp {
|
||||
break;
|
||||
}
|
||||
|
||||
left = match peek_token.kind {
|
||||
TokenKind::LParen => self.parse_call_expr(left)?,
|
||||
TokenKind::LBracket => self.parse_index_expr(left)?,
|
||||
TokenKind::KwAs => self.parse_cast_expr(left)?,
|
||||
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some((left_bp, right_bp, op)) = infix_binding_power(peek_token.kind) {
|
||||
if left_bp < min_bp {
|
||||
break;
|
||||
}
|
||||
|
||||
let op_span = self.advance().span;
|
||||
|
||||
let right = self.parse_expression(right_bp)?;
|
||||
let span = left.span.extend(right.span);
|
||||
|
||||
left = ast::ParsedExpression {
|
||||
kind: ast::ExpressionKind::Binary {
|
||||
op,
|
||||
op_span,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
},
|
||||
span,
|
||||
extra: (),
|
||||
};
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
/// Parses a primary (non-operator) expression: an identifier, integer
|
||||
/// literal, boolean literal, or a parenthesised expression.
|
||||
///
|
||||
/// Integer literals support `0x` (hex), `0o` (octal), and `0b` (binary)
|
||||
/// prefixes in addition to plain decimal.
|
||||
fn parse_primary_expression(&mut self) -> Result<ast::ParsedExpression, Diagnostic> {
|
||||
let peek_token = self.peek_no_eof()?;
|
||||
|
||||
match peek_token.kind {
|
||||
TokenKind::Identifier => {
|
||||
let name = self.advance().text.to_string();
|
||||
|
||||
Ok(ast::ParsedExpression {
|
||||
kind: ast::ExpressionKind::Identifier(name),
|
||||
span: peek_token.span,
|
||||
extra: (),
|
||||
})
|
||||
}
|
||||
TokenKind::LitInt => {
|
||||
let tok = self.advance();
|
||||
|
||||
let (radix, src) = [("0x", 16), ("0o", 8), ("0b", 2)]
|
||||
.into_iter()
|
||||
.find_map(|(prefix, radix)| {
|
||||
tok.text.strip_prefix(prefix).map(|text| (radix, text))
|
||||
})
|
||||
.unwrap_or((10, tok.text));
|
||||
|
||||
let value = u64::from_str_radix(src, radix).map_err(|_| {
|
||||
Diagnostic::new(Severity::Error, "invalid integer literal")
|
||||
.with_span(tok.span)
|
||||
.add_label(tok.span, "this is an invalid integer literal")
|
||||
})?;
|
||||
|
||||
Ok(ast::ParsedExpression {
|
||||
kind: ast::ExpressionKind::LitInteger(value),
|
||||
span: tok.span,
|
||||
extra: (),
|
||||
})
|
||||
}
|
||||
TokenKind::LitBool => {
|
||||
let value = self.advance().text == "true";
|
||||
|
||||
Ok(ast::ParsedExpression {
|
||||
kind: ast::ExpressionKind::LitBool(value),
|
||||
span: peek_token.span,
|
||||
extra: (),
|
||||
})
|
||||
}
|
||||
TokenKind::LParen => {
|
||||
let open_paren = self.advance();
|
||||
let inner = self.parse_expression(0)?;
|
||||
let close_paren = self.expect(TokenKind::RParen)?;
|
||||
|
||||
Ok(ast::ParsedExpression {
|
||||
kind: inner.kind,
|
||||
span: open_paren.span.extend(close_paren.span),
|
||||
extra: (),
|
||||
})
|
||||
}
|
||||
_ => Err(Diagnostic::new(
|
||||
Severity::Error,
|
||||
format!(
|
||||
"expected one of {}, {} or {} but found {} instead",
|
||||
TokenKind::Identifier,
|
||||
TokenKind::LitInt,
|
||||
TokenKind::LitBool,
|
||||
peek_token.kind
|
||||
),
|
||||
)
|
||||
.with_span(peek_token.span)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a function-call expression `func(arg, …)`.
|
||||
///
|
||||
/// The opening `(` is consumed here; `func` is the already-parsed callee
|
||||
/// expression passed in from the Pratt loop.
|
||||
fn parse_call_expr(
|
||||
&mut self,
|
||||
func: ast::ParsedExpression,
|
||||
) -> Result<ast::ParsedExpression, Diagnostic> {
|
||||
self.expect(TokenKind::LParen)?;
|
||||
let mut args = Vec::new();
|
||||
|
||||
while !self.is_at_eof() && !self.is_peek(TokenKind::RParen) {
|
||||
if !args.is_empty() {
|
||||
self.expect(TokenKind::Comma)?;
|
||||
}
|
||||
|
||||
args.push(self.parse_expression(0)?);
|
||||
}
|
||||
|
||||
let rparen_token = self.expect(TokenKind::RParen)?;
|
||||
let span = func.span.extend(rparen_token.span);
|
||||
|
||||
Ok(ast::ParsedExpression {
|
||||
kind: ast::ExpressionKind::Call {
|
||||
func: Box::new(func),
|
||||
args,
|
||||
},
|
||||
span,
|
||||
extra: (),
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses an index expression `expr[index]`.
|
||||
///
|
||||
/// The opening `[` is consumed here; `expr` is the already-parsed
|
||||
/// collection expression passed in from the Pratt loop.
|
||||
fn parse_index_expr(
|
||||
&mut self,
|
||||
expr: ast::ParsedExpression,
|
||||
) -> Result<ast::ParsedExpression, Diagnostic> {
|
||||
self.expect(TokenKind::LBracket)?;
|
||||
|
||||
let index = self.parse_expression(0)?;
|
||||
|
||||
let rbracket_token = self.expect(TokenKind::RBracket)?;
|
||||
let span = expr.span.extend(rbracket_token.span);
|
||||
|
||||
Ok(ast::ParsedExpression {
|
||||
kind: ast::ExpressionKind::Index {
|
||||
expr: Box::new(expr),
|
||||
index: Box::new(index),
|
||||
},
|
||||
span,
|
||||
extra: (),
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses a cast expression `expr as Type`.
|
||||
///
|
||||
/// The `as` keyword is consumed here; `expr` is the already-parsed value
|
||||
/// expression passed in from the Pratt loop.
|
||||
fn parse_cast_expr(
|
||||
&mut self,
|
||||
expr: ast::ParsedExpression,
|
||||
) -> Result<ast::ParsedExpression, Diagnostic> {
|
||||
self.expect(TokenKind::KwAs)?;
|
||||
|
||||
let ty = self.parse_type()?;
|
||||
let span = expr.span.extend(ty.span);
|
||||
|
||||
Ok(ast::ParsedExpression {
|
||||
kind: ast::ExpressionKind::Cast {
|
||||
expr: Box::new(expr),
|
||||
ty: Box::new(ty),
|
||||
},
|
||||
span,
|
||||
extra: (),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `(left_bp, right_bp, op)` for infix operators, or `None` if `kind`
|
||||
/// is not an infix operator.
|
||||
///
|
||||
/// The two binding-power values implement associativity: equal values give
|
||||
/// left-associativity, and `right_bp = left_bp` gives right-associativity
|
||||
/// (currently used for `=`).
|
||||
fn infix_binding_power(kind: TokenKind) -> Option<(u8, u8, ast::BinaryOp)> {
|
||||
Some(match kind {
|
||||
TokenKind::Assign => (2, 2, ast::BinaryOp::Assign),
|
||||
|
||||
TokenKind::KwOr => (10, 11, ast::BinaryOp::Or),
|
||||
TokenKind::KwAnd => (20, 21, ast::BinaryOp::And),
|
||||
|
||||
TokenKind::Pipe => (30, 31, ast::BinaryOp::BitOr),
|
||||
TokenKind::Caret => (40, 41, ast::BinaryOp::BitXor),
|
||||
TokenKind::Amp => (50, 51, ast::BinaryOp::BitAnd),
|
||||
|
||||
TokenKind::Eq => (55, 56, ast::BinaryOp::Eq),
|
||||
TokenKind::Ne => (55, 56, ast::BinaryOp::Ne),
|
||||
|
||||
TokenKind::Lt => (57, 58, ast::BinaryOp::Lt),
|
||||
TokenKind::Le => (57, 58, ast::BinaryOp::Le),
|
||||
TokenKind::Gt => (57, 58, ast::BinaryOp::Gt),
|
||||
TokenKind::Ge => (57, 58, ast::BinaryOp::Ge),
|
||||
|
||||
TokenKind::Plus => (60, 61, ast::BinaryOp::Add),
|
||||
TokenKind::Minus => (60, 61, ast::BinaryOp::Sub),
|
||||
|
||||
TokenKind::Shl => (65, 66, ast::BinaryOp::BitShl),
|
||||
TokenKind::Shr => (65, 66, ast::BinaryOp::BitShr),
|
||||
|
||||
TokenKind::Star => (70, 71, ast::BinaryOp::Mul),
|
||||
TokenKind::Slash => (70, 71, ast::BinaryOp::Div),
|
||||
TokenKind::Percent => (70, 71, ast::BinaryOp::Rem),
|
||||
|
||||
TokenKind::Dot => (100, 101, ast::BinaryOp::Dot),
|
||||
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns `(right_bp, op)` for prefix operators, or `None` if `kind` is not
|
||||
/// a prefix operator.
|
||||
///
|
||||
/// All prefix operators currently share the same binding power (`80`), giving
|
||||
/// them higher precedence than any binary operator.
|
||||
fn prefix_binding_power(kind: TokenKind) -> Option<(u8, ast::UnaryOp)> {
|
||||
Some(match kind {
|
||||
TokenKind::Minus => (80, ast::UnaryOp::Neg),
|
||||
TokenKind::Amp => (80, ast::UnaryOp::AddrOf),
|
||||
TokenKind::Tilde => (80, ast::UnaryOp::BitNot),
|
||||
TokenKind::Star => (80, ast::UnaryOp::Deref),
|
||||
TokenKind::Bang => (80, ast::UnaryOp::Not),
|
||||
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the *left* binding power for postfix operators, or `None` if `kind`
|
||||
/// is not a postfix operator.
|
||||
///
|
||||
/// Postfix operators (`()`, `[]`, `as`) bind tighter than all binary operators
|
||||
/// but are checked before prefix operators in the Pratt loop so they always
|
||||
/// apply to the nearest sub-expression.
|
||||
fn postfix_binding_power(kind: TokenKind) -> Option<u8> {
|
||||
Some(match kind {
|
||||
TokenKind::LParen => 100,
|
||||
TokenKind::LBracket => 100,
|
||||
TokenKind::KwAs => 90,
|
||||
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
179
src/token.rs
Normal file
179
src/token.rs
Normal file
@@ -0,0 +1,179 @@
|
||||
//! Token definitions used by the [`Lexer`](crate::lexer::Lexer) and
|
||||
//! [`Parser`](crate::parser::Parser).
|
||||
//!
|
||||
//! The two core types are:
|
||||
//! - [`Span`] — a half-open byte range that marks a location in source text.
|
||||
//! - [`Token`] — a classified slice of source text together with its span.
|
||||
//!
|
||||
//! [`TokenKind`] enumerates every token variant; its [`Display`](std::fmt::Display)
|
||||
//! impl produces the human-readable representation used in diagnostics.
|
||||
use std::fmt;
|
||||
|
||||
/// A half-open byte range `[start, end)` that marks a location in the source
|
||||
/// string.
|
||||
///
|
||||
/// Positions are stored as [`u32`], which limits supported source files to
|
||||
/// 4 GiB — more than sufficient for any practical source file.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct Span {
|
||||
/// Inclusive start byte offset.
|
||||
pub start: u32,
|
||||
/// Exclusive end byte offset.
|
||||
pub end: u32,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
/// Creates a new span covering `[start, end)`.
|
||||
pub const fn new(start: u32, end: u32) -> Self {
|
||||
Self { start, end }
|
||||
}
|
||||
|
||||
/// Returns the length of the span in bytes.
|
||||
///
|
||||
/// Uses saturating subtraction so an inverted span returns `0` rather than
|
||||
/// wrapping.
|
||||
pub fn len(&self) -> u32 {
|
||||
self.end.saturating_sub(self.start)
|
||||
}
|
||||
|
||||
/// Returns `true` if the span covers zero bytes (`start == end`).
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.start == self.end
|
||||
}
|
||||
|
||||
/// Returns the smallest span that covers both `self` and `other`.
|
||||
///
|
||||
/// This is the union of the two ranges, useful for computing the span of a
|
||||
/// parent node from its children.
|
||||
pub fn extend(self, other: Self) -> Self {
|
||||
Self {
|
||||
start: self.start.min(other.start),
|
||||
end: self.end.max(other.end),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Span {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}..{}", self.start, self.end)
|
||||
}
|
||||
}
|
||||
|
||||
/// Simultaneously defines the [`TokenKind`] enum and its [`fmt::Display`] impl.
|
||||
///
|
||||
/// Each arm maps a variant name to the human-readable string used in
|
||||
/// diagnostics (e.g. `` `+` ``, `identifier`).
|
||||
macro_rules! define_tokens {
|
||||
($($name:ident => $repr:literal),* $(,)?) => {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum TokenKind {
|
||||
$($name),*
|
||||
}
|
||||
|
||||
impl fmt::Display for TokenKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(match self {
|
||||
$(TokenKind::$name => $repr),*
|
||||
})
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
define_tokens! {
|
||||
// -- Identifier --
|
||||
Identifier => "identifier",
|
||||
|
||||
// -- Literals --
|
||||
LitInt => "integer literal",
|
||||
LitBool => "boolean literal",
|
||||
LitString => "string literal",
|
||||
|
||||
// -- Keywords --
|
||||
KwAnd => "`and`",
|
||||
KwOr => "`or`",
|
||||
KwAs => "`as`",
|
||||
KwLet => "`let`",
|
||||
|
||||
// -- Type Keywords --
|
||||
TyU8 => "`u8`",
|
||||
TyU16 => "`u16`",
|
||||
TyU32 => "`u32`",
|
||||
TyU64 => "`u64`",
|
||||
TyI8 => "`i8`",
|
||||
TyI16 => "`i16`",
|
||||
TyI32 => "`i32`",
|
||||
TyI64 => "`i64`",
|
||||
TyBool => "`bool`",
|
||||
|
||||
// -- Arithmetic Operators --
|
||||
Plus => "`+`",
|
||||
Minus => "`-`",
|
||||
Star => "`*`",
|
||||
Slash => "`/`",
|
||||
Percent => "`%`",
|
||||
|
||||
// -- Bitwise / Logical Operators --
|
||||
Amp => "`&`",
|
||||
Pipe => "`|`",
|
||||
Caret => "`^`",
|
||||
Tilde => "`~`",
|
||||
Shl => "`<<`",
|
||||
Shr => "`>>`",
|
||||
Bang => "`!`",
|
||||
|
||||
// -- Comparison Operators --
|
||||
Eq => "`==`",
|
||||
Ne => "`!=`",
|
||||
Lt => "`<`",
|
||||
Le => "`<=`",
|
||||
Gt => "`>`",
|
||||
Ge => "`>=`",
|
||||
|
||||
// -- Punctuation --
|
||||
Assign => "`=`",
|
||||
Arrow => "`->`",
|
||||
Dot => "`.`",
|
||||
Colon => "`:`",
|
||||
Comma => "`,`",
|
||||
Semi => "`;`",
|
||||
|
||||
// -- Delimiters --
|
||||
LParen => "`(`",
|
||||
RParen => "`)`",
|
||||
LBracket => "`[`",
|
||||
RBracket => "`]`",
|
||||
LCurly => "`{`",
|
||||
RCurly => "`}`",
|
||||
|
||||
// -- Special --
|
||||
Unknown => "unknown character"
|
||||
}
|
||||
|
||||
/// The smallest contiguous unit of source text, as produced by the
|
||||
/// [`Lexer`](crate::lexer::Lexer).
|
||||
///
|
||||
/// A token borrows its [`text`](Token::text) slice directly from the original
|
||||
/// source string, so the lifetime `'src` ties every token to that source.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Token<'src> {
|
||||
/// The syntactic category of this token.
|
||||
pub kind: TokenKind,
|
||||
/// The byte range in the source string where this token appears.
|
||||
pub span: Span,
|
||||
/// The raw source text of this token (a zero-copy slice).
|
||||
pub text: &'src str,
|
||||
}
|
||||
|
||||
impl<'src> Token<'src> {
|
||||
/// Returns `true` if this token has the given [`TokenKind`].
|
||||
pub fn is(&self, kind: TokenKind) -> bool {
|
||||
self.kind == kind
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src> fmt::Display for Token<'src> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{:?} `{}` @ {}", self.kind, self.text, self.span)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user