Feat: add compound assignment and shift operators
Compound assignment: +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>= Shift: <<, >> Each compound assignment token parses at the same precedence as `=` (right-associative, lowest) and produces ExprKind::CompoundAssign. Shifts parse between additive and multiplicative precedence. GRAMMAR.ebnf and SYNTAX.md updated accordingly.
This commit is contained in:
44
GRAMMAR.ebnf
44
GRAMMAR.ebnf
@@ -36,20 +36,27 @@ top_level_def = func_def
|
||||
expr = assign_expr ;
|
||||
|
||||
|
||||
(* --- Assignment (lowest-precedence binary operator) --- *)
|
||||
(* --- Assignment and compound assignment (lowest precedence) --- *)
|
||||
(* *)
|
||||
(* Uses token `=`; right-associative via recursion. *)
|
||||
(* The optional form encodes at-most-one assignment target: chains *)
|
||||
(* like `a = b = c` parse as `a = (b = c)` thanks to right *)
|
||||
(* recursion. *)
|
||||
(* assign_op covers `=` and all compound-assignment operators. *)
|
||||
(* All have the same precedence and are right-associative: *)
|
||||
(* `a = b = c` → `a = (b = c)` *)
|
||||
(* `a += b += c` → `a += (b += c)` (unusual but syntactically *)
|
||||
(* valid; semantics checked later) *)
|
||||
(* *)
|
||||
(* Compound assignments expand semantically: *)
|
||||
(* `x += y` → `x = x + y` *)
|
||||
(* `x -= y` → `x = x - y` etc. *)
|
||||
(* *)
|
||||
(* LL(1): after or_expr, peek at next token. *)
|
||||
(* "=" → consume and recurse into assign_expr *)
|
||||
(* assign_op token → consume and recurse into assign_expr *)
|
||||
(* other → return the or_expr as-is *)
|
||||
(* "=" is not in FIRST(stmt), so expr_stmt can still be *)
|
||||
(* distinguished from other statement kinds. *)
|
||||
(* None of the assign_op tokens are in FIRST(stmt), so expr_stmt *)
|
||||
(* remains unambiguous. *)
|
||||
|
||||
assign_expr = or_expr , [ "=" , assign_expr ] ;
|
||||
assign_expr = or_expr , [ assign_op , assign_expr ] ;
|
||||
|
||||
assign_op = "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" ;
|
||||
|
||||
|
||||
(* --- Logical OR (lowest-precedence binary operator) --- *)
|
||||
@@ -83,8 +90,14 @@ bitand_expr = additive_expr , { "&" , additive_expr } ;
|
||||
|
||||
(* --- Additive: addition and subtraction --- *)
|
||||
|
||||
additive_expr = multiplicative_expr ,
|
||||
{ ( "+" | "-" ) , multiplicative_expr } ;
|
||||
additive_expr = shift_expr ,
|
||||
{ ( "+" | "-" ) , shift_expr } ;
|
||||
|
||||
|
||||
(* --- Shift: left shift and right shift --- *)
|
||||
|
||||
shift_expr = multiplicative_expr ,
|
||||
{ ( "<<" | ">>" ) , multiplicative_expr } ;
|
||||
|
||||
|
||||
(* --- Multiplicative: multiplication, division, modulo --- *)
|
||||
@@ -183,7 +196,7 @@ arg_list = [ expr , { "," , expr } ] ;
|
||||
|
||||
expr_ns = assign_expr_ns ;
|
||||
|
||||
assign_expr_ns = or_expr_ns , [ "=" , assign_expr_ns ] ;
|
||||
assign_expr_ns = or_expr_ns , [ assign_op , assign_expr_ns ] ;
|
||||
|
||||
or_expr_ns = and_expr_ns , { "or" , and_expr_ns } ;
|
||||
and_expr_ns = bitor_expr_ns , { "and" , bitor_expr_ns } ;
|
||||
@@ -192,8 +205,11 @@ bitor_expr_ns = bitxor_expr_ns , { "|" , bitxor_expr_ns } ;
|
||||
bitxor_expr_ns = bitand_expr_ns , { "^" , bitand_expr_ns } ;
|
||||
bitand_expr_ns = additive_expr_ns , { "&" , additive_expr_ns } ;
|
||||
|
||||
additive_expr_ns = multiplicative_expr_ns ,
|
||||
{ ( "+" | "-" ) , multiplicative_expr_ns } ;
|
||||
additive_expr_ns = shift_expr_ns ,
|
||||
{ ( "+" | "-" ) , shift_expr_ns } ;
|
||||
|
||||
shift_expr_ns = multiplicative_expr_ns ,
|
||||
{ ( "<<" | ">>" ) , multiplicative_expr_ns } ;
|
||||
|
||||
multiplicative_expr_ns = unary_expr_ns ,
|
||||
{ ( "*" | "/" | "%" ) , unary_expr_ns } ;
|
||||
|
||||
39
SYNTAX.md
39
SYNTAX.md
@@ -25,7 +25,7 @@ appear as UPPERCASE terminals in `GRAMMAR.ebnf`.
|
||||
### Operator Tokens
|
||||
|
||||
| Token | Lexeme | Description |
|
||||
| --------- | ------ | -------------------------------------- |
|
||||
| ------------ | ------ | -------------------------------------- |
|
||||
| `PLUS` | `+` | Addition / unary plus (not in grammar) |
|
||||
| `MINUS` | `-` | Subtraction / unary negation |
|
||||
| `STAR` | `*` | Multiplication / pointer dereference |
|
||||
@@ -37,7 +37,19 @@ appear as UPPERCASE terminals in `GRAMMAR.ebnf`.
|
||||
| `BANG` | `!` | Logical NOT |
|
||||
| `TILDE` | `~` | Bitwise NOT |
|
||||
| `DOT` | `.` | Member access |
|
||||
| `SHL` | `<<` | Left shift |
|
||||
| `SHR` | `>>` | Right shift |
|
||||
| `EQ` | `=` | Assignment |
|
||||
| `PLUS_EQ` | `+=` | Add-assign |
|
||||
| `MINUS_EQ` | `-=` | Subtract-assign |
|
||||
| `STAR_EQ` | `*=` | Multiply-assign |
|
||||
| `SLASH_EQ` | `/=` | Divide-assign |
|
||||
| `PERCENT_EQ` | `%=` | Modulo-assign |
|
||||
| `AMP_EQ` | `&=` | Bitwise-AND-assign |
|
||||
| `PIPE_EQ` | `\|=` | Bitwise-OR-assign |
|
||||
| `CARET_EQ` | `^=` | Bitwise-XOR-assign |
|
||||
| `SHL_EQ` | `<<=` | Left-shift-assign |
|
||||
| `SHR_EQ` | `>>=` | Right-shift-assign |
|
||||
|
||||
### Keyword Tokens
|
||||
|
||||
@@ -128,18 +140,19 @@ tightly).
|
||||
### Operator Precedence Table
|
||||
|
||||
| Level | Operators | Associativity | Description |
|
||||
| ----- | --------------------------- | -------------- | -------------------------------- |
|
||||
| 1 | `=` | right | Assignment (lowest) |
|
||||
| ----- | -------------------------------------------------------- | -------------- | -------------------------------- |
|
||||
| 1 | `=` `+=` `-=` `*=` `/=` `%=` `&=` `\|=` `^=` `<<=` `>>=` | right | Assignment (lowest) |
|
||||
| 2 | `or` | left | Logical OR |
|
||||
| 3 | `and` | left | Logical AND |
|
||||
| 4 | `\|` | left | Bitwise OR |
|
||||
| 5 | `^` | left | Bitwise XOR |
|
||||
| 6 | `&` | left | Bitwise AND |
|
||||
| 7 | `+` `-` | left | Addition, subtraction |
|
||||
| 8 | `*` `/` `%` | left | Multiplication, division, modulo |
|
||||
| 9 | `!` `~` `-` `*` `&` | right (unary) | Prefix unary operators |
|
||||
| 10 | `.` `[…]` `(…)` | left (postfix) | Member access, index, call |
|
||||
| 11 | literals, identifiers, `()` | — | Primary expressions (highest) |
|
||||
| 8 | `<<` `>>` | left | Bit shift |
|
||||
| 9 | `*` `/` `%` | left | Multiplication, division, modulo |
|
||||
| 10 | `!` `~` `-` `*` `&` | right (unary) | Prefix unary operators |
|
||||
| 11 | `.` `[…]` `(…)` | left (postfix) | Member access, index, call |
|
||||
| 12 | literals, identifiers, `()` | — | Primary expressions (highest) |
|
||||
|
||||
### Operator Descriptions
|
||||
|
||||
@@ -148,6 +161,18 @@ tightly).
|
||||
| Operator | Name | Example | Notes |
|
||||
| -------- | -------------- | --------- | ---------------------------------------------- |
|
||||
| `=` | Assignment | `a = b` | Right-associative; `a = b = c` → `a = (b = c)` |
|
||||
| `+=` | Add-assign | `a += b` | Expands to `a = a + b` |
|
||||
| `-=` | Sub-assign | `a -= b` | Expands to `a = a - b` |
|
||||
| `*=` | Mul-assign | `a *= b` | Expands to `a = a * b` |
|
||||
| `/=` | Div-assign | `a /= b` | Expands to `a = a / b` |
|
||||
| `%=` | Rem-assign | `a %= b` | Expands to `a = a % b` |
|
||||
| `&=` | BitAnd-assign | `a &= b` | Expands to `a = a & b` |
|
||||
| `\|=` | BitOr-assign | `a \|= b` | Expands to `a = a \| b` |
|
||||
| `^=` | BitXor-assign | `a ^= b` | Expands to `a = a ^ b` |
|
||||
| `<<` | Left shift | `a << b` | Shift `a` left by `b` bits; integer types |
|
||||
| `>>` | Right shift | `a >> b` | Shift `a` right by `b` bits; integer types |
|
||||
| `<<=` | Shl-assign | `a <<= b` | Expands to `a = a << b` |
|
||||
| `>>=` | Shr-assign | `a >>= b` | Expands to `a = a >> b` |
|
||||
| `or` | Logical OR | `a or b` | Short-circuits; both operands must be `bool` |
|
||||
| `and` | Logical AND | `a and b` | Short-circuits; both operands must be `bool` |
|
||||
| `\|` | Bitwise OR | `a \| b` | Integer types |
|
||||
|
||||
@@ -11,6 +11,20 @@ pub enum UnaryOp {
|
||||
AddrOf, // `&`
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum CompoundAssignOp {
|
||||
Add, // `+=`
|
||||
Sub, // `-=`
|
||||
Mul, // `*=`
|
||||
Div, // `/=`
|
||||
Rem, // `%=`
|
||||
BitAnd, // `&=`
|
||||
BitOr, // `|=`
|
||||
BitXor, // `^=`
|
||||
Shl, // `<<=`
|
||||
Shr, // `>>=`
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum BinaryOp {
|
||||
// Logical
|
||||
@@ -33,6 +47,9 @@ pub enum BinaryOp {
|
||||
Mul, // `*`
|
||||
Div, // `/`
|
||||
Rem, // `%`
|
||||
// Shift
|
||||
Shl, // `<<`
|
||||
Shr, // `>>`
|
||||
// Assignment (lowest precedence, right-associative)
|
||||
Assign, // `=`
|
||||
}
|
||||
@@ -123,6 +140,13 @@ pub enum ExprKind {
|
||||
lhs: Box<Expr>,
|
||||
rhs: Box<Expr>,
|
||||
},
|
||||
// Compound assignment: `lhs op= rhs` (expands to `lhs = lhs op rhs`)
|
||||
CompoundAssign {
|
||||
op: CompoundAssignOp,
|
||||
op_span: Span,
|
||||
lhs: Box<Expr>,
|
||||
rhs: Box<Expr>,
|
||||
},
|
||||
|
||||
// Postfix
|
||||
Field {
|
||||
|
||||
@@ -267,14 +267,63 @@ impl<'src> Lexer<'src> {
|
||||
self.advance();
|
||||
|
||||
let kind = match c {
|
||||
// ── Unambiguous single-character tokens ──────────────────────────
|
||||
'+' => TokenKind::Plus,
|
||||
'*' => TokenKind::Star,
|
||||
'/' => TokenKind::Slash,
|
||||
'%' => TokenKind::Percent,
|
||||
'&' => TokenKind::Amp,
|
||||
'|' => TokenKind::Pipe,
|
||||
'^' => TokenKind::Caret,
|
||||
// ── Tokens that may be the prefix of a compound-assignment ───────
|
||||
'+' => {
|
||||
if self.peek() == Some('=') {
|
||||
self.advance();
|
||||
TokenKind::PlusEq
|
||||
} else {
|
||||
TokenKind::Plus
|
||||
}
|
||||
}
|
||||
'*' => {
|
||||
if self.peek() == Some('=') {
|
||||
self.advance();
|
||||
TokenKind::StarEq
|
||||
} else {
|
||||
TokenKind::Star
|
||||
}
|
||||
}
|
||||
'/' => {
|
||||
if self.peek() == Some('=') {
|
||||
self.advance();
|
||||
TokenKind::SlashEq
|
||||
} else {
|
||||
TokenKind::Slash
|
||||
}
|
||||
}
|
||||
'%' => {
|
||||
if self.peek() == Some('=') {
|
||||
self.advance();
|
||||
TokenKind::PercentEq
|
||||
} else {
|
||||
TokenKind::Percent
|
||||
}
|
||||
}
|
||||
'&' => {
|
||||
if self.peek() == Some('=') {
|
||||
self.advance();
|
||||
TokenKind::AmpEq
|
||||
} else {
|
||||
TokenKind::Amp
|
||||
}
|
||||
}
|
||||
'|' => {
|
||||
if self.peek() == Some('=') {
|
||||
self.advance();
|
||||
TokenKind::PipeEq
|
||||
} else {
|
||||
TokenKind::Pipe
|
||||
}
|
||||
}
|
||||
'^' => {
|
||||
if self.peek() == Some('=') {
|
||||
self.advance();
|
||||
TokenKind::CaretEq
|
||||
} else {
|
||||
TokenKind::Caret
|
||||
}
|
||||
}
|
||||
'~' => TokenKind::Tilde,
|
||||
'.' => TokenKind::Dot,
|
||||
'(' => TokenKind::LParen,
|
||||
@@ -292,6 +341,9 @@ impl<'src> Lexer<'src> {
|
||||
if self.peek() == Some('>') {
|
||||
self.advance();
|
||||
TokenKind::Arrow
|
||||
} else if self.peek() == Some('=') {
|
||||
self.advance();
|
||||
TokenKind::MinusEq
|
||||
} else {
|
||||
TokenKind::Minus
|
||||
}
|
||||
@@ -313,7 +365,13 @@ impl<'src> Lexer<'src> {
|
||||
}
|
||||
}
|
||||
'<' => {
|
||||
if self.peek() == Some('=') {
|
||||
if self.at_ascii2(b'<', b'=') {
|
||||
self.pos += 2;
|
||||
TokenKind::ShlEq
|
||||
} else if self.peek() == Some('<') {
|
||||
self.advance();
|
||||
TokenKind::Shl
|
||||
} else if self.peek() == Some('=') {
|
||||
self.advance();
|
||||
TokenKind::LtEq
|
||||
} else {
|
||||
@@ -321,7 +379,13 @@ impl<'src> Lexer<'src> {
|
||||
}
|
||||
}
|
||||
'>' => {
|
||||
if self.peek() == Some('=') {
|
||||
if self.at_ascii2(b'>', b'=') {
|
||||
self.pos += 2;
|
||||
TokenKind::ShrEq
|
||||
} else if self.peek() == Some('>') {
|
||||
self.advance();
|
||||
TokenKind::Shr
|
||||
} else if self.peek() == Some('=') {
|
||||
self.advance();
|
||||
TokenKind::GtEq
|
||||
} else {
|
||||
@@ -518,6 +582,31 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compound_assign_operators() {
|
||||
let src = "+= -= *= /= %= &= |= ^= <<= >>=";
|
||||
assert_eq!(
|
||||
kinds(src),
|
||||
vec![
|
||||
PlusEq, MinusEq, StarEq, SlashEq, PercentEq, AmpEq, PipeEq, CaretEq, ShlEq,
|
||||
ShrEq, Eof
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shift_operators() {
|
||||
let src = "<< >> <<= >>=";
|
||||
assert_eq!(kinds(src), vec![Shl, Shr, ShlEq, ShrEq, Eof]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shift_does_not_steal_comparison() {
|
||||
// `< =` (with space) should be Lt then Eq, not LtEq
|
||||
let src = "a < b > c";
|
||||
assert_eq!(kinds(src), vec![Ident, Lt, Ident, Gt, Ident, Eof]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn punctuation() {
|
||||
assert_eq!(
|
||||
|
||||
@@ -2,8 +2,9 @@ use std::fmt;
|
||||
|
||||
use crate::{
|
||||
ast::{
|
||||
BinaryOp, Block, ElseBranch, Expr, ExprKind, FieldDef, FuncDef, Param, Program, Stmt,
|
||||
StmtKind, StructDef, StructField, TopLevelDef, TopLevelDefKind, Type, UnaryOp,
|
||||
BinaryOp, Block, CompoundAssignOp, ElseBranch, Expr, ExprKind, FieldDef, FuncDef, Param,
|
||||
Program, Stmt, StmtKind, StructDef, StructField, TopLevelDef, TopLevelDefKind, Type,
|
||||
UnaryOp,
|
||||
},
|
||||
lexer::Lexer,
|
||||
token::{Span, Token, TokenKind},
|
||||
@@ -35,9 +36,18 @@ impl fmt::Display for ParseError {
|
||||
|
||||
fn infix_bp(kind: TokenKind) -> Option<(u8, u8)> {
|
||||
let bp = match kind {
|
||||
// Assignment: lowest precedence, right-associative (left_bp == right_bp).
|
||||
// `a = b = c` → `a = (b = c)`.
|
||||
TokenKind::Eq => (2, 2),
|
||||
// Assignment and compound assignment: lowest precedence, right-associative.
|
||||
TokenKind::Eq
|
||||
| TokenKind::PlusEq
|
||||
| TokenKind::MinusEq
|
||||
| TokenKind::StarEq
|
||||
| TokenKind::SlashEq
|
||||
| TokenKind::PercentEq
|
||||
| TokenKind::AmpEq
|
||||
| TokenKind::PipeEq
|
||||
| TokenKind::CaretEq
|
||||
| TokenKind::ShlEq
|
||||
| TokenKind::ShrEq => (2, 2),
|
||||
TokenKind::Or => (10, 11),
|
||||
TokenKind::And => (20, 21),
|
||||
TokenKind::Pipe => (30, 31),
|
||||
@@ -50,6 +60,7 @@ fn infix_bp(kind: TokenKind) -> Option<(u8, u8)> {
|
||||
| TokenKind::LtEq
|
||||
| TokenKind::GtEq => (55, 56),
|
||||
TokenKind::Plus | TokenKind::Minus => (60, 61),
|
||||
TokenKind::Shl | TokenKind::Shr => (65, 66),
|
||||
TokenKind::Star | TokenKind::Slash | TokenKind::Percent => (70, 71),
|
||||
// Postfix: `.`, `[`, `(` — handled separately in parse_led, bp listed
|
||||
// here only so callers can detect them as infix/postfix operators.
|
||||
@@ -82,6 +93,22 @@ fn token_to_unary_op(kind: TokenKind) -> UnaryOp {
|
||||
}
|
||||
}
|
||||
|
||||
fn token_to_compound_assign_op(kind: TokenKind) -> Option<CompoundAssignOp> {
|
||||
match kind {
|
||||
TokenKind::PlusEq => Some(CompoundAssignOp::Add),
|
||||
TokenKind::MinusEq => Some(CompoundAssignOp::Sub),
|
||||
TokenKind::StarEq => Some(CompoundAssignOp::Mul),
|
||||
TokenKind::SlashEq => Some(CompoundAssignOp::Div),
|
||||
TokenKind::PercentEq => Some(CompoundAssignOp::Rem),
|
||||
TokenKind::AmpEq => Some(CompoundAssignOp::BitAnd),
|
||||
TokenKind::PipeEq => Some(CompoundAssignOp::BitOr),
|
||||
TokenKind::CaretEq => Some(CompoundAssignOp::BitXor),
|
||||
TokenKind::ShlEq => Some(CompoundAssignOp::Shl),
|
||||
TokenKind::ShrEq => Some(CompoundAssignOp::Shr),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn token_to_binary_op(kind: TokenKind) -> BinaryOp {
|
||||
match kind {
|
||||
TokenKind::Or => BinaryOp::Or,
|
||||
@@ -100,6 +127,8 @@ fn token_to_binary_op(kind: TokenKind) -> BinaryOp {
|
||||
TokenKind::Star => BinaryOp::Mul,
|
||||
TokenKind::Slash => BinaryOp::Div,
|
||||
TokenKind::Percent => BinaryOp::Rem,
|
||||
TokenKind::Shl => BinaryOp::Shl,
|
||||
TokenKind::Shr => BinaryOp::Shr,
|
||||
TokenKind::Eq => BinaryOp::Assign,
|
||||
_ => unreachable!("not a binary op: {:?}", kind),
|
||||
}
|
||||
@@ -588,6 +617,22 @@ impl<'src> Parser<'src> {
|
||||
)
|
||||
}
|
||||
|
||||
// Compound assignment: `lhs op= rhs`
|
||||
kind if token_to_compound_assign_op(kind).is_some() => {
|
||||
let op = token_to_compound_assign_op(kind).unwrap();
|
||||
let rhs = self.pratt(r_bp, allow_struct_lit);
|
||||
let span = lhs.span.cover(rhs.span);
|
||||
Expr::new(
|
||||
ExprKind::CompoundAssign {
|
||||
op,
|
||||
op_span: op_tok.span,
|
||||
lhs: Box::new(lhs),
|
||||
rhs: Box::new(rhs),
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
|
||||
// Binary operator
|
||||
kind => {
|
||||
let op = token_to_binary_op(kind);
|
||||
|
||||
@@ -85,6 +85,10 @@ define_tokens! {
|
||||
Bang => "`!`",
|
||||
Tilde => "`~`",
|
||||
|
||||
// ── Shift operators ───────────────────────────────────────────────────────
|
||||
Shl => "`<<`",
|
||||
Shr => "`>>`",
|
||||
|
||||
// ── Comparison operators ──────────────────────────────────────────────────
|
||||
EqEq => "`==`",
|
||||
BangEq => "`!=`",
|
||||
@@ -95,6 +99,16 @@ define_tokens! {
|
||||
|
||||
// ── Assignment ────────────────────────────────────────────────────────────
|
||||
Eq => "`=`",
|
||||
PlusEq => "`+=`",
|
||||
MinusEq => "`-=`",
|
||||
StarEq => "`*=`",
|
||||
SlashEq => "`/=`",
|
||||
PercentEq => "`%=`",
|
||||
AmpEq => "`&=`",
|
||||
PipeEq => "`|=`",
|
||||
CaretEq => "`^=`",
|
||||
ShlEq => "`<<=`",
|
||||
ShrEq => "`>>=`",
|
||||
|
||||
// ── Punctuation ───────────────────────────────────────────────────────────
|
||||
Arrow => "`->`",
|
||||
|
||||
Reference in New Issue
Block a user