From 51a4000f1fc654d7bdf4a2a4d2f2d3ebac6f3061 Mon Sep 17 00:00:00 2001 From: Jooris Hadeler Date: Mon, 20 Apr 2026 19:00:36 +0200 Subject: [PATCH] feat: add token and span definitions --- PLAN.md | 2 +- src/frontend/mod.rs | 1 + src/frontend/token.rs | 123 ++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 2 + 4 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 src/frontend/mod.rs create mode 100644 src/frontend/token.rs diff --git a/PLAN.md b/PLAN.md index d543010..df56f19 100644 --- a/PLAN.md +++ b/PLAN.md @@ -7,7 +7,7 @@ A Rust-flavored, C-targeting language - built pipeline-first. ## Phase 1 - Lexer -- [ ] Define token enum (int literal, bool literal, ident, keywords, operators, punctuation) +- [x] Define token enum (int literal, bool literal, ident, keywords, operators, punctuation) - [ ] Implement character-by-character scanner loop - [ ] Handle whitespace & single-line comments (`//`) - [ ] Produce source spans (file, line, col) on every token diff --git a/src/frontend/mod.rs b/src/frontend/mod.rs new file mode 100644 index 0000000..79c66ba --- /dev/null +++ b/src/frontend/mod.rs @@ -0,0 +1 @@ +pub mod token; diff --git a/src/frontend/token.rs b/src/frontend/token.rs new file mode 100644 index 0000000..d38b323 --- /dev/null +++ b/src/frontend/token.rs @@ -0,0 +1,123 @@ +use std::fmt::Display; + +/// A half-open interval `[start, end)` representing a location in the source text. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Span { + pub start: usize, + pub end: usize, +} + +impl Span { + /// Create a new [Span] from the start and end positions. + pub const fn new(start: usize, end: usize) -> Self { + debug_assert!(start <= end); + + Self { start, end } + } + + /// Create a new extended [Span] which contains both `self` and `other`. + pub fn join(self, other: Self) -> Self { + Self::new(self.start.min(other.start), self.end.max(other.end)) + } +} + +/// A fundamental, categorized unit of source code produced during lexical analysis. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Token<'src> { + pub kind: TokenKind, + pub text: &'src str, + pub span: Span, +} + +impl<'src> Token<'src> { + /// Create a new [Token] from kind, source text and [Span]. + pub const fn new(kind: TokenKind, text: &'src str, span: Span) -> Self { + Self { kind, text, span } + } +} + +/// An enum representing all possible kinds of [Token]s. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TokenKind { + Invalid, + + // Literals + Identifier, + IntegerLit, + BooleanLit, + + // Keywords + Fn, + Return, + + // Types + I8, + I16, + I32, + I64, + U8, + U16, + U32, + U64, + Bool, + + // Operators + Plus, + Minus, + Star, + Slash, + Percent, + + // Punctuation + Dot, + Comma, + Colon, + Semicolon, + Arrow, + + // Parentheses + LParen, + RParen, + LBrace, + RBrace, + LBracket, + RBracket, +} + +impl Display for TokenKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + TokenKind::Invalid => "an invalid token", + TokenKind::Identifier => "an identifier", + TokenKind::IntegerLit => "an integer", + TokenKind::BooleanLit => "a boolean", + TokenKind::Fn => "`fn`", + TokenKind::Return => "`return`", + TokenKind::I8 => "`i8`", + TokenKind::I16 => "`i16`", + TokenKind::I32 => "`i32`", + TokenKind::I64 => "`i64`", + TokenKind::U8 => "`u8`", + TokenKind::U16 => "`u16`", + TokenKind::U32 => "`u32`", + TokenKind::U64 => "`u64`", + TokenKind::Bool => "`bool`", + TokenKind::Plus => "`+`", + TokenKind::Minus => "`-`", + TokenKind::Star => "`*`", + TokenKind::Slash => "`/`", + TokenKind::Percent => "`%`", + TokenKind::Dot => "`.`", + TokenKind::Comma => "`,`", + TokenKind::Colon => "`:`", + TokenKind::Semicolon => "`;`", + TokenKind::Arrow => "`->`", + TokenKind::LParen => "`(`", + TokenKind::RParen => "`)`", + TokenKind::LBrace => "`{`", + TokenKind::RBrace => "`}`", + TokenKind::LBracket => "`[`", + TokenKind::RBracket => "`]`", + }) + } +} diff --git a/src/main.rs b/src/main.rs index e7a11a9..feeee8c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +pub mod frontend; + fn main() { println!("Hello, world!"); }