feat: add token and span definitions
This commit is contained in:
@@ -7,7 +7,7 @@ A Rust-flavored, C-targeting language - built pipeline-first.
|
|||||||
|
|
||||||
## Phase 1 - Lexer
|
## Phase 1 - Lexer
|
||||||
|
|
||||||
- [ ] Define token enum (int literal, bool literal, ident, keywords, operators, punctuation)
|
- [x] Define token enum (int literal, bool literal, ident, keywords, operators, punctuation)
|
||||||
- [ ] Implement character-by-character scanner loop
|
- [ ] Implement character-by-character scanner loop
|
||||||
- [ ] Handle whitespace & single-line comments (`//`)
|
- [ ] Handle whitespace & single-line comments (`//`)
|
||||||
- [ ] Produce source spans (file, line, col) on every token
|
- [ ] Produce source spans (file, line, col) on every token
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
pub mod token;
|
||||||
@@ -0,0 +1,123 @@
|
|||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
/// A half-open interval `[start, end)` representing a location in the source text.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct Span {
|
||||||
|
pub start: usize,
|
||||||
|
pub end: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Span {
|
||||||
|
/// Create a new [Span] from the start and end positions.
|
||||||
|
pub const fn new(start: usize, end: usize) -> Self {
|
||||||
|
debug_assert!(start <= end);
|
||||||
|
|
||||||
|
Self { start, end }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new extended [Span] which contains both `self` and `other`.
|
||||||
|
pub fn join(self, other: Self) -> Self {
|
||||||
|
Self::new(self.start.min(other.start), self.end.max(other.end))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A fundamental, categorized unit of source code produced during lexical analysis.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct Token<'src> {
|
||||||
|
pub kind: TokenKind,
|
||||||
|
pub text: &'src str,
|
||||||
|
pub span: Span,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src> Token<'src> {
|
||||||
|
/// Create a new [Token] from kind, source text and [Span].
|
||||||
|
pub const fn new(kind: TokenKind, text: &'src str, span: Span) -> Self {
|
||||||
|
Self { kind, text, span }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An enum representing all possible kinds of [Token]s.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum TokenKind {
|
||||||
|
Invalid,
|
||||||
|
|
||||||
|
// Literals
|
||||||
|
Identifier,
|
||||||
|
IntegerLit,
|
||||||
|
BooleanLit,
|
||||||
|
|
||||||
|
// Keywords
|
||||||
|
Fn,
|
||||||
|
Return,
|
||||||
|
|
||||||
|
// Types
|
||||||
|
I8,
|
||||||
|
I16,
|
||||||
|
I32,
|
||||||
|
I64,
|
||||||
|
U8,
|
||||||
|
U16,
|
||||||
|
U32,
|
||||||
|
U64,
|
||||||
|
Bool,
|
||||||
|
|
||||||
|
// Operators
|
||||||
|
Plus,
|
||||||
|
Minus,
|
||||||
|
Star,
|
||||||
|
Slash,
|
||||||
|
Percent,
|
||||||
|
|
||||||
|
// Punctuation
|
||||||
|
Dot,
|
||||||
|
Comma,
|
||||||
|
Colon,
|
||||||
|
Semicolon,
|
||||||
|
Arrow,
|
||||||
|
|
||||||
|
// Parentheses
|
||||||
|
LParen,
|
||||||
|
RParen,
|
||||||
|
LBrace,
|
||||||
|
RBrace,
|
||||||
|
LBracket,
|
||||||
|
RBracket,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for TokenKind {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.write_str(match self {
|
||||||
|
TokenKind::Invalid => "an invalid token",
|
||||||
|
TokenKind::Identifier => "an identifier",
|
||||||
|
TokenKind::IntegerLit => "an integer",
|
||||||
|
TokenKind::BooleanLit => "a boolean",
|
||||||
|
TokenKind::Fn => "`fn`",
|
||||||
|
TokenKind::Return => "`return`",
|
||||||
|
TokenKind::I8 => "`i8`",
|
||||||
|
TokenKind::I16 => "`i16`",
|
||||||
|
TokenKind::I32 => "`i32`",
|
||||||
|
TokenKind::I64 => "`i64`",
|
||||||
|
TokenKind::U8 => "`u8`",
|
||||||
|
TokenKind::U16 => "`u16`",
|
||||||
|
TokenKind::U32 => "`u32`",
|
||||||
|
TokenKind::U64 => "`u64`",
|
||||||
|
TokenKind::Bool => "`bool`",
|
||||||
|
TokenKind::Plus => "`+`",
|
||||||
|
TokenKind::Minus => "`-`",
|
||||||
|
TokenKind::Star => "`*`",
|
||||||
|
TokenKind::Slash => "`/`",
|
||||||
|
TokenKind::Percent => "`%`",
|
||||||
|
TokenKind::Dot => "`.`",
|
||||||
|
TokenKind::Comma => "`,`",
|
||||||
|
TokenKind::Colon => "`:`",
|
||||||
|
TokenKind::Semicolon => "`;`",
|
||||||
|
TokenKind::Arrow => "`->`",
|
||||||
|
TokenKind::LParen => "`(`",
|
||||||
|
TokenKind::RParen => "`)`",
|
||||||
|
TokenKind::LBrace => "`{`",
|
||||||
|
TokenKind::RBrace => "`}`",
|
||||||
|
TokenKind::LBracket => "`[`",
|
||||||
|
TokenKind::RBracket => "`]`",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
|
pub mod frontend;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
println!("Hello, world!");
|
println!("Hello, world!");
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user