Feat: add compound assignment and shift operators

Compound assignment: +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>=
Shift: <<, >>

Each compound assignment token parses at the same precedence as `=`
(right-associative, lowest) and produces ExprKind::CompoundAssign.
Shifts parse between additive and multiplicative precedence.
GRAMMAR.ebnf and SYNTAX.md updated accordingly.
This commit is contained in:
2026-03-10 18:29:52 +01:00
parent 1a4e464d5e
commit a82b7e4633
6 changed files with 269 additions and 56 deletions

View File

@@ -36,20 +36,27 @@ top_level_def = func_def
expr = assign_expr ; expr = assign_expr ;
(* --- Assignment (lowest-precedence binary operator) --- *) (* --- Assignment and compound assignment (lowest precedence) --- *)
(* *) (* *)
(* Uses token `=`; right-associative via recursion. *) (* assign_op covers `=` and all compound-assignment operators. *)
(* The optional form encodes at-most-one assignment target: chains *) (* All have the same precedence and are right-associative: *)
(* like `a = b = c` parse as `a = (b = c)` thanks to right *) (* `a = b = c` `a = (b = c)` *)
(* recursion. *) (* `a += b += c` `a += (b += c)` (unusual but syntactically *)
(* valid; semantics checked later) *)
(* *)
(* Compound assignments expand semantically: *)
(* `x += y` `x = x + y` *)
(* `x -= y` `x = x - y` etc. *)
(* *) (* *)
(* LL(1): after or_expr, peek at next token. *) (* LL(1): after or_expr, peek at next token. *)
(* "=" consume and recurse into assign_expr *) (* assign_op token consume and recurse into assign_expr *)
(* other return the or_expr as-is *) (* other return the or_expr as-is *)
(* "=" is not in FIRST(stmt), so expr_stmt can still be *) (* None of the assign_op tokens are in FIRST(stmt), so expr_stmt *)
(* distinguished from other statement kinds. *) (* remains unambiguous. *)
assign_expr = or_expr , [ "=" , assign_expr ] ; assign_expr = or_expr , [ assign_op , assign_expr ] ;
assign_op = "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" ;
(* --- Logical OR (lowest-precedence binary operator) --- *) (* --- Logical OR (lowest-precedence binary operator) --- *)
@@ -83,8 +90,14 @@ bitand_expr = additive_expr , { "&" , additive_expr } ;
(* --- Additive: addition and subtraction --- *) (* --- Additive: addition and subtraction --- *)
additive_expr = multiplicative_expr , additive_expr = shift_expr ,
{ ( "+" | "-" ) , multiplicative_expr } ; { ( "+" | "-" ) , shift_expr } ;
(* --- Shift: left shift and right shift --- *)
shift_expr = multiplicative_expr ,
{ ( "<<" | ">>" ) , multiplicative_expr } ;
(* --- Multiplicative: multiplication, division, modulo --- *) (* --- Multiplicative: multiplication, division, modulo --- *)
@@ -183,7 +196,7 @@ arg_list = [ expr , { "," , expr } ] ;
expr_ns = assign_expr_ns ; expr_ns = assign_expr_ns ;
assign_expr_ns = or_expr_ns , [ "=" , assign_expr_ns ] ; assign_expr_ns = or_expr_ns , [ assign_op , assign_expr_ns ] ;
or_expr_ns = and_expr_ns , { "or" , and_expr_ns } ; or_expr_ns = and_expr_ns , { "or" , and_expr_ns } ;
and_expr_ns = bitor_expr_ns , { "and" , bitor_expr_ns } ; and_expr_ns = bitor_expr_ns , { "and" , bitor_expr_ns } ;
@@ -192,8 +205,11 @@ bitor_expr_ns = bitxor_expr_ns , { "|" , bitxor_expr_ns } ;
bitxor_expr_ns = bitand_expr_ns , { "^" , bitand_expr_ns } ; bitxor_expr_ns = bitand_expr_ns , { "^" , bitand_expr_ns } ;
bitand_expr_ns = additive_expr_ns , { "&" , additive_expr_ns } ; bitand_expr_ns = additive_expr_ns , { "&" , additive_expr_ns } ;
additive_expr_ns = multiplicative_expr_ns , additive_expr_ns = shift_expr_ns ,
{ ( "+" | "-" ) , multiplicative_expr_ns } ; { ( "+" | "-" ) , shift_expr_ns } ;
shift_expr_ns = multiplicative_expr_ns ,
{ ( "<<" | ">>" ) , multiplicative_expr_ns } ;
multiplicative_expr_ns = unary_expr_ns , multiplicative_expr_ns = unary_expr_ns ,
{ ( "*" | "/" | "%" ) , unary_expr_ns } ; { ( "*" | "/" | "%" ) , unary_expr_ns } ;

View File

@@ -24,20 +24,32 @@ appear as UPPERCASE terminals in `GRAMMAR.ebnf`.
### Operator Tokens ### Operator Tokens
| Token | Lexeme | Description | | Token | Lexeme | Description |
| --------- | ------ | -------------------------------------- | | ------------ | ------ | -------------------------------------- |
| `PLUS` | `+` | Addition / unary plus (not in grammar) | | `PLUS` | `+` | Addition / unary plus (not in grammar) |
| `MINUS` | `-` | Subtraction / unary negation | | `MINUS` | `-` | Subtraction / unary negation |
| `STAR` | `*` | Multiplication / pointer dereference | | `STAR` | `*` | Multiplication / pointer dereference |
| `SLASH` | `/` | Division | | `SLASH` | `/` | Division |
| `PERCENT` | `%` | Modulo (remainder) | | `PERCENT` | `%` | Modulo (remainder) |
| `AMP` | `&` | Bitwise AND / address-of | | `AMP` | `&` | Bitwise AND / address-of |
| `PIPE` | `\|` | Bitwise OR | | `PIPE` | `\|` | Bitwise OR |
| `CARET` | `^` | Bitwise XOR | | `CARET` | `^` | Bitwise XOR |
| `BANG` | `!` | Logical NOT | | `BANG` | `!` | Logical NOT |
| `TILDE` | `~` | Bitwise NOT | | `TILDE` | `~` | Bitwise NOT |
| `DOT` | `.` | Member access | | `DOT` | `.` | Member access |
| `EQ` | `=` | Assignment | | `SHL` | `<<` | Left shift |
| `SHR` | `>>` | Right shift |
| `EQ` | `=` | Assignment |
| `PLUS_EQ` | `+=` | Add-assign |
| `MINUS_EQ` | `-=` | Subtract-assign |
| `STAR_EQ` | `*=` | Multiply-assign |
| `SLASH_EQ` | `/=` | Divide-assign |
| `PERCENT_EQ` | `%=` | Modulo-assign |
| `AMP_EQ` | `&=` | Bitwise-AND-assign |
| `PIPE_EQ` | `\|=` | Bitwise-OR-assign |
| `CARET_EQ` | `^=` | Bitwise-XOR-assign |
| `SHL_EQ` | `<<=` | Left-shift-assign |
| `SHR_EQ` | `>>=` | Right-shift-assign |
### Keyword Tokens ### Keyword Tokens
@@ -127,19 +139,20 @@ tightly).
### Operator Precedence Table ### Operator Precedence Table
| Level | Operators | Associativity | Description | | Level | Operators | Associativity | Description |
| ----- | --------------------------- | -------------- | -------------------------------- | | ----- | -------------------------------------------------------- | -------------- | -------------------------------- |
| 1 | `=` | right | Assignment (lowest) | | 1 | `=` `+=` `-=` `*=` `/=` `%=` `&=` `\|=` `^=` `<<=` `>>=` | right | Assignment (lowest) |
| 2 | `or` | left | Logical OR | | 2 | `or` | left | Logical OR |
| 3 | `and` | left | Logical AND | | 3 | `and` | left | Logical AND |
| 4 | `\|` | left | Bitwise OR | | 4 | `\|` | left | Bitwise OR |
| 5 | `^` | left | Bitwise XOR | | 5 | `^` | left | Bitwise XOR |
| 6 | `&` | left | Bitwise AND | | 6 | `&` | left | Bitwise AND |
| 7 | `+` `-` | left | Addition, subtraction | | 7 | `+` `-` | left | Addition, subtraction |
| 8 | `*` `/` `%` | left | Multiplication, division, modulo | | 8 | `<<` `>>` | left | Bit shift |
| 9 | `!` `~` `-` `*` `&` | right (unary) | Prefix unary operators | | 9 | `*` `/` `%` | left | Multiplication, division, modulo |
| 10 | `.` `[…]` `(…)` | left (postfix) | Member access, index, call | | 10 | `!` `~` `-` `*` `&` | right (unary) | Prefix unary operators |
| 11 | literals, identifiers, `()` | — | Primary expressions (highest) | | 11 | `.` `[…]` `()` | left (postfix) | Member access, index, call |
| 12 | literals, identifiers, `()` | — | Primary expressions (highest) |
### Operator Descriptions ### Operator Descriptions
@@ -148,6 +161,18 @@ tightly).
| Operator | Name | Example | Notes | | Operator | Name | Example | Notes |
| -------- | -------------- | --------- | ---------------------------------------------- | | -------- | -------------- | --------- | ---------------------------------------------- |
| `=` | Assignment | `a = b` | Right-associative; `a = b = c``a = (b = c)` | | `=` | Assignment | `a = b` | Right-associative; `a = b = c``a = (b = c)` |
| `+=` | Add-assign | `a += b` | Expands to `a = a + b` |
| `-=` | Sub-assign | `a -= b` | Expands to `a = a - b` |
| `*=` | Mul-assign | `a *= b` | Expands to `a = a * b` |
| `/=` | Div-assign | `a /= b` | Expands to `a = a / b` |
| `%=` | Rem-assign | `a %= b` | Expands to `a = a % b` |
| `&=` | BitAnd-assign | `a &= b` | Expands to `a = a & b` |
| `\|=` | BitOr-assign | `a \|= b` | Expands to `a = a \| b` |
| `^=` | BitXor-assign | `a ^= b` | Expands to `a = a ^ b` |
| `<<` | Left shift | `a << b` | Shift `a` left by `b` bits; integer types |
| `>>` | Right shift | `a >> b` | Shift `a` right by `b` bits; integer types |
| `<<=` | Shl-assign | `a <<= b` | Expands to `a = a << b` |
| `>>=` | Shr-assign | `a >>= b` | Expands to `a = a >> b` |
| `or` | Logical OR | `a or b` | Short-circuits; both operands must be `bool` | | `or` | Logical OR | `a or b` | Short-circuits; both operands must be `bool` |
| `and` | Logical AND | `a and b` | Short-circuits; both operands must be `bool` | | `and` | Logical AND | `a and b` | Short-circuits; both operands must be `bool` |
| `\|` | Bitwise OR | `a \| b` | Integer types | | `\|` | Bitwise OR | `a \| b` | Integer types |

View File

@@ -11,6 +11,20 @@ pub enum UnaryOp {
AddrOf, // `&` AddrOf, // `&`
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompoundAssignOp {
Add, // `+=`
Sub, // `-=`
Mul, // `*=`
Div, // `/=`
Rem, // `%=`
BitAnd, // `&=`
BitOr, // `|=`
BitXor, // `^=`
Shl, // `<<=`
Shr, // `>>=`
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BinaryOp { pub enum BinaryOp {
// Logical // Logical
@@ -33,6 +47,9 @@ pub enum BinaryOp {
Mul, // `*` Mul, // `*`
Div, // `/` Div, // `/`
Rem, // `%` Rem, // `%`
// Shift
Shl, // `<<`
Shr, // `>>`
// Assignment (lowest precedence, right-associative) // Assignment (lowest precedence, right-associative)
Assign, // `=` Assign, // `=`
} }
@@ -123,6 +140,13 @@ pub enum ExprKind {
lhs: Box<Expr>, lhs: Box<Expr>,
rhs: Box<Expr>, rhs: Box<Expr>,
}, },
// Compound assignment: `lhs op= rhs` (expands to `lhs = lhs op rhs`)
CompoundAssign {
op: CompoundAssignOp,
op_span: Span,
lhs: Box<Expr>,
rhs: Box<Expr>,
},
// Postfix // Postfix
Field { Field {

View File

@@ -267,14 +267,63 @@ impl<'src> Lexer<'src> {
self.advance(); self.advance();
let kind = match c { let kind = match c {
// ── Unambiguous single-character tokens ────────────────────────── // ── Tokens that may be the prefix of a compound-assignment ───────
'+' => TokenKind::Plus, '+' => {
'*' => TokenKind::Star, if self.peek() == Some('=') {
'/' => TokenKind::Slash, self.advance();
'%' => TokenKind::Percent, TokenKind::PlusEq
'&' => TokenKind::Amp, } else {
'|' => TokenKind::Pipe, TokenKind::Plus
'^' => TokenKind::Caret, }
}
'*' => {
if self.peek() == Some('=') {
self.advance();
TokenKind::StarEq
} else {
TokenKind::Star
}
}
'/' => {
if self.peek() == Some('=') {
self.advance();
TokenKind::SlashEq
} else {
TokenKind::Slash
}
}
'%' => {
if self.peek() == Some('=') {
self.advance();
TokenKind::PercentEq
} else {
TokenKind::Percent
}
}
'&' => {
if self.peek() == Some('=') {
self.advance();
TokenKind::AmpEq
} else {
TokenKind::Amp
}
}
'|' => {
if self.peek() == Some('=') {
self.advance();
TokenKind::PipeEq
} else {
TokenKind::Pipe
}
}
'^' => {
if self.peek() == Some('=') {
self.advance();
TokenKind::CaretEq
} else {
TokenKind::Caret
}
}
'~' => TokenKind::Tilde, '~' => TokenKind::Tilde,
'.' => TokenKind::Dot, '.' => TokenKind::Dot,
'(' => TokenKind::LParen, '(' => TokenKind::LParen,
@@ -292,6 +341,9 @@ impl<'src> Lexer<'src> {
if self.peek() == Some('>') { if self.peek() == Some('>') {
self.advance(); self.advance();
TokenKind::Arrow TokenKind::Arrow
} else if self.peek() == Some('=') {
self.advance();
TokenKind::MinusEq
} else { } else {
TokenKind::Minus TokenKind::Minus
} }
@@ -313,7 +365,13 @@ impl<'src> Lexer<'src> {
} }
} }
'<' => { '<' => {
if self.peek() == Some('=') { if self.at_ascii2(b'<', b'=') {
self.pos += 2;
TokenKind::ShlEq
} else if self.peek() == Some('<') {
self.advance();
TokenKind::Shl
} else if self.peek() == Some('=') {
self.advance(); self.advance();
TokenKind::LtEq TokenKind::LtEq
} else { } else {
@@ -321,7 +379,13 @@ impl<'src> Lexer<'src> {
} }
} }
'>' => { '>' => {
if self.peek() == Some('=') { if self.at_ascii2(b'>', b'=') {
self.pos += 2;
TokenKind::ShrEq
} else if self.peek() == Some('>') {
self.advance();
TokenKind::Shr
} else if self.peek() == Some('=') {
self.advance(); self.advance();
TokenKind::GtEq TokenKind::GtEq
} else { } else {
@@ -518,6 +582,31 @@ mod tests {
); );
} }
#[test]
fn compound_assign_operators() {
let src = "+= -= *= /= %= &= |= ^= <<= >>=";
assert_eq!(
kinds(src),
vec![
PlusEq, MinusEq, StarEq, SlashEq, PercentEq, AmpEq, PipeEq, CaretEq, ShlEq,
ShrEq, Eof
]
);
}
#[test]
fn shift_operators() {
let src = "<< >> <<= >>=";
assert_eq!(kinds(src), vec![Shl, Shr, ShlEq, ShrEq, Eof]);
}
#[test]
fn shift_does_not_steal_comparison() {
// `< =` (with space) should be Lt then Eq, not LtEq
let src = "a < b > c";
assert_eq!(kinds(src), vec![Ident, Lt, Ident, Gt, Ident, Eof]);
}
#[test] #[test]
fn punctuation() { fn punctuation() {
assert_eq!( assert_eq!(

View File

@@ -2,8 +2,9 @@ use std::fmt;
use crate::{ use crate::{
ast::{ ast::{
BinaryOp, Block, ElseBranch, Expr, ExprKind, FieldDef, FuncDef, Param, Program, Stmt, BinaryOp, Block, CompoundAssignOp, ElseBranch, Expr, ExprKind, FieldDef, FuncDef, Param,
StmtKind, StructDef, StructField, TopLevelDef, TopLevelDefKind, Type, UnaryOp, Program, Stmt, StmtKind, StructDef, StructField, TopLevelDef, TopLevelDefKind, Type,
UnaryOp,
}, },
lexer::Lexer, lexer::Lexer,
token::{Span, Token, TokenKind}, token::{Span, Token, TokenKind},
@@ -35,9 +36,18 @@ impl fmt::Display for ParseError {
fn infix_bp(kind: TokenKind) -> Option<(u8, u8)> { fn infix_bp(kind: TokenKind) -> Option<(u8, u8)> {
let bp = match kind { let bp = match kind {
// Assignment: lowest precedence, right-associative (left_bp == right_bp). // Assignment and compound assignment: lowest precedence, right-associative.
// `a = b = c` → `a = (b = c)`. TokenKind::Eq
TokenKind::Eq => (2, 2), | TokenKind::PlusEq
| TokenKind::MinusEq
| TokenKind::StarEq
| TokenKind::SlashEq
| TokenKind::PercentEq
| TokenKind::AmpEq
| TokenKind::PipeEq
| TokenKind::CaretEq
| TokenKind::ShlEq
| TokenKind::ShrEq => (2, 2),
TokenKind::Or => (10, 11), TokenKind::Or => (10, 11),
TokenKind::And => (20, 21), TokenKind::And => (20, 21),
TokenKind::Pipe => (30, 31), TokenKind::Pipe => (30, 31),
@@ -50,6 +60,7 @@ fn infix_bp(kind: TokenKind) -> Option<(u8, u8)> {
| TokenKind::LtEq | TokenKind::LtEq
| TokenKind::GtEq => (55, 56), | TokenKind::GtEq => (55, 56),
TokenKind::Plus | TokenKind::Minus => (60, 61), TokenKind::Plus | TokenKind::Minus => (60, 61),
TokenKind::Shl | TokenKind::Shr => (65, 66),
TokenKind::Star | TokenKind::Slash | TokenKind::Percent => (70, 71), TokenKind::Star | TokenKind::Slash | TokenKind::Percent => (70, 71),
// Postfix: `.`, `[`, `(` — handled separately in parse_led, bp listed // Postfix: `.`, `[`, `(` — handled separately in parse_led, bp listed
// here only so callers can detect them as infix/postfix operators. // here only so callers can detect them as infix/postfix operators.
@@ -82,6 +93,22 @@ fn token_to_unary_op(kind: TokenKind) -> UnaryOp {
} }
} }
fn token_to_compound_assign_op(kind: TokenKind) -> Option<CompoundAssignOp> {
match kind {
TokenKind::PlusEq => Some(CompoundAssignOp::Add),
TokenKind::MinusEq => Some(CompoundAssignOp::Sub),
TokenKind::StarEq => Some(CompoundAssignOp::Mul),
TokenKind::SlashEq => Some(CompoundAssignOp::Div),
TokenKind::PercentEq => Some(CompoundAssignOp::Rem),
TokenKind::AmpEq => Some(CompoundAssignOp::BitAnd),
TokenKind::PipeEq => Some(CompoundAssignOp::BitOr),
TokenKind::CaretEq => Some(CompoundAssignOp::BitXor),
TokenKind::ShlEq => Some(CompoundAssignOp::Shl),
TokenKind::ShrEq => Some(CompoundAssignOp::Shr),
_ => None,
}
}
fn token_to_binary_op(kind: TokenKind) -> BinaryOp { fn token_to_binary_op(kind: TokenKind) -> BinaryOp {
match kind { match kind {
TokenKind::Or => BinaryOp::Or, TokenKind::Or => BinaryOp::Or,
@@ -100,6 +127,8 @@ fn token_to_binary_op(kind: TokenKind) -> BinaryOp {
TokenKind::Star => BinaryOp::Mul, TokenKind::Star => BinaryOp::Mul,
TokenKind::Slash => BinaryOp::Div, TokenKind::Slash => BinaryOp::Div,
TokenKind::Percent => BinaryOp::Rem, TokenKind::Percent => BinaryOp::Rem,
TokenKind::Shl => BinaryOp::Shl,
TokenKind::Shr => BinaryOp::Shr,
TokenKind::Eq => BinaryOp::Assign, TokenKind::Eq => BinaryOp::Assign,
_ => unreachable!("not a binary op: {:?}", kind), _ => unreachable!("not a binary op: {:?}", kind),
} }
@@ -588,6 +617,22 @@ impl<'src> Parser<'src> {
) )
} }
// Compound assignment: `lhs op= rhs`
kind if token_to_compound_assign_op(kind).is_some() => {
let op = token_to_compound_assign_op(kind).unwrap();
let rhs = self.pratt(r_bp, allow_struct_lit);
let span = lhs.span.cover(rhs.span);
Expr::new(
ExprKind::CompoundAssign {
op,
op_span: op_tok.span,
lhs: Box::new(lhs),
rhs: Box::new(rhs),
},
span,
)
}
// Binary operator // Binary operator
kind => { kind => {
let op = token_to_binary_op(kind); let op = token_to_binary_op(kind);

View File

@@ -85,6 +85,10 @@ define_tokens! {
Bang => "`!`", Bang => "`!`",
Tilde => "`~`", Tilde => "`~`",
// ── Shift operators ───────────────────────────────────────────────────────
Shl => "`<<`",
Shr => "`>>`",
// ── Comparison operators ────────────────────────────────────────────────── // ── Comparison operators ──────────────────────────────────────────────────
EqEq => "`==`", EqEq => "`==`",
BangEq => "`!=`", BangEq => "`!=`",
@@ -95,6 +99,16 @@ define_tokens! {
// ── Assignment ──────────────────────────────────────────────────────────── // ── Assignment ────────────────────────────────────────────────────────────
Eq => "`=`", Eq => "`=`",
PlusEq => "`+=`",
MinusEq => "`-=`",
StarEq => "`*=`",
SlashEq => "`/=`",
PercentEq => "`%=`",
AmpEq => "`&=`",
PipeEq => "`|=`",
CaretEq => "`^=`",
ShlEq => "`<<=`",
ShrEq => "`>>=`",
// ── Punctuation ─────────────────────────────────────────────────────────── // ── Punctuation ───────────────────────────────────────────────────────────
Arrow => "`->`", Arrow => "`->`",