feat: add token and span definitions

This commit is contained in:
2026-04-20 19:00:36 +02:00
parent 46028f9072
commit 51a4000f1f
4 changed files with 127 additions and 1 deletions
+1 -1
View File
@@ -7,7 +7,7 @@ A Rust-flavored, C-targeting language - built pipeline-first.
## Phase 1 - Lexer
- [ ] Define token enum (int literal, bool literal, ident, keywords, operators, punctuation)
- [x] Define token enum (int literal, bool literal, ident, keywords, operators, punctuation)
- [ ] Implement character-by-character scanner loop
- [ ] Handle whitespace & single-line comments (`//`)
- [ ] Produce source spans (file, line, col) on every token
+1
View File
@@ -0,0 +1 @@
pub mod token;
+123
View File
@@ -0,0 +1,123 @@
use std::fmt::Display;
/// A half-open interval `[start, end)` representing a location in the source text.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Span {
pub start: usize,
pub end: usize,
}
impl Span {
/// Create a new [Span] from the start and end positions.
pub const fn new(start: usize, end: usize) -> Self {
debug_assert!(start <= end);
Self { start, end }
}
/// Create a new extended [Span] which contains both `self` and `other`.
pub fn join(self, other: Self) -> Self {
Self::new(self.start.min(other.start), self.end.max(other.end))
}
}
/// A fundamental, categorized unit of source code produced during lexical analysis.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Token<'src> {
pub kind: TokenKind,
pub text: &'src str,
pub span: Span,
}
impl<'src> Token<'src> {
/// Create a new [Token] from kind, source text and [Span].
pub const fn new(kind: TokenKind, text: &'src str, span: Span) -> Self {
Self { kind, text, span }
}
}
/// An enum representing all possible kinds of [Token]s.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TokenKind {
Invalid,
// Literals
Identifier,
IntegerLit,
BooleanLit,
// Keywords
Fn,
Return,
// Types
I8,
I16,
I32,
I64,
U8,
U16,
U32,
U64,
Bool,
// Operators
Plus,
Minus,
Star,
Slash,
Percent,
// Punctuation
Dot,
Comma,
Colon,
Semicolon,
Arrow,
// Parentheses
LParen,
RParen,
LBrace,
RBrace,
LBracket,
RBracket,
}
impl Display for TokenKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
TokenKind::Invalid => "an invalid token",
TokenKind::Identifier => "an identifier",
TokenKind::IntegerLit => "an integer",
TokenKind::BooleanLit => "a boolean",
TokenKind::Fn => "`fn`",
TokenKind::Return => "`return`",
TokenKind::I8 => "`i8`",
TokenKind::I16 => "`i16`",
TokenKind::I32 => "`i32`",
TokenKind::I64 => "`i64`",
TokenKind::U8 => "`u8`",
TokenKind::U16 => "`u16`",
TokenKind::U32 => "`u32`",
TokenKind::U64 => "`u64`",
TokenKind::Bool => "`bool`",
TokenKind::Plus => "`+`",
TokenKind::Minus => "`-`",
TokenKind::Star => "`*`",
TokenKind::Slash => "`/`",
TokenKind::Percent => "`%`",
TokenKind::Dot => "`.`",
TokenKind::Comma => "`,`",
TokenKind::Colon => "`:`",
TokenKind::Semicolon => "`;`",
TokenKind::Arrow => "`->`",
TokenKind::LParen => "`(`",
TokenKind::RParen => "`)`",
TokenKind::LBrace => "`{`",
TokenKind::RBrace => "`}`",
TokenKind::LBracket => "`[`",
TokenKind::RBracket => "`]`",
})
}
}
+2
View File
@@ -1,3 +1,5 @@
pub mod frontend;
fn main() {
println!("Hello, world!");
}