From a82b7e46332451389278949da8c5b93b03ca569f Mon Sep 17 00:00:00 2001 From: Jooris Hadeler Date: Tue, 10 Mar 2026 18:29:52 +0100 Subject: [PATCH] Feat: add compound assignment and shift operators Compound assignment: +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>= Shift: <<, >> Each compound assignment token parses at the same precedence as `=` (right-associative, lowest) and produces ExprKind::CompoundAssign. Shifts parse between additive and multiplicative precedence. GRAMMAR.ebnf and SYNTAX.md updated accordingly. --- GRAMMAR.ebnf | 44 ++++++++++++------ SYNTAX.md | 79 +++++++++++++++++++++----------- fluxc/src/ast.rs | 24 ++++++++++ fluxc/src/lexer.rs | 109 ++++++++++++++++++++++++++++++++++++++++---- fluxc/src/parser.rs | 55 ++++++++++++++++++++-- fluxc/src/token.rs | 14 ++++++ 6 files changed, 269 insertions(+), 56 deletions(-) diff --git a/GRAMMAR.ebnf b/GRAMMAR.ebnf index 8e07521..1af3291 100644 --- a/GRAMMAR.ebnf +++ b/GRAMMAR.ebnf @@ -36,20 +36,27 @@ top_level_def = func_def expr = assign_expr ; -(* --- Assignment (lowest-precedence binary operator) --- *) +(* --- Assignment and compound assignment (lowest precedence) --- *) (* *) -(* Uses token `=`; right-associative via recursion. *) -(* The optional form encodes at-most-one assignment target: chains *) -(* like `a = b = c` parse as `a = (b = c)` thanks to right *) -(* recursion. *) +(* assign_op covers `=` and all compound-assignment operators. *) +(* All have the same precedence and are right-associative: *) +(* `a = b = c` → `a = (b = c)` *) +(* `a += b += c` → `a += (b += c)` (unusual but syntactically *) +(* valid; semantics checked later) *) +(* *) +(* Compound assignments expand semantically: *) +(* `x += y` → `x = x + y` *) +(* `x -= y` → `x = x - y` etc. *) (* *) (* LL(1): after or_expr, peek at next token. *) -(* "=" → consume and recurse into assign_expr *) +(* assign_op token → consume and recurse into assign_expr *) (* other → return the or_expr as-is *) -(* "=" is not in FIRST(stmt), so expr_stmt can still be *) -(* distinguished from other statement kinds. *) +(* None of the assign_op tokens are in FIRST(stmt), so expr_stmt *) +(* remains unambiguous. *) -assign_expr = or_expr , [ "=" , assign_expr ] ; +assign_expr = or_expr , [ assign_op , assign_expr ] ; + +assign_op = "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" ; (* --- Logical OR (lowest-precedence binary operator) --- *) @@ -83,8 +90,14 @@ bitand_expr = additive_expr , { "&" , additive_expr } ; (* --- Additive: addition and subtraction --- *) -additive_expr = multiplicative_expr , - { ( "+" | "-" ) , multiplicative_expr } ; +additive_expr = shift_expr , + { ( "+" | "-" ) , shift_expr } ; + + +(* --- Shift: left shift and right shift --- *) + +shift_expr = multiplicative_expr , + { ( "<<" | ">>" ) , multiplicative_expr } ; (* --- Multiplicative: multiplication, division, modulo --- *) @@ -183,7 +196,7 @@ arg_list = [ expr , { "," , expr } ] ; expr_ns = assign_expr_ns ; -assign_expr_ns = or_expr_ns , [ "=" , assign_expr_ns ] ; +assign_expr_ns = or_expr_ns , [ assign_op , assign_expr_ns ] ; or_expr_ns = and_expr_ns , { "or" , and_expr_ns } ; and_expr_ns = bitor_expr_ns , { "and" , bitor_expr_ns } ; @@ -192,8 +205,11 @@ bitor_expr_ns = bitxor_expr_ns , { "|" , bitxor_expr_ns } ; bitxor_expr_ns = bitand_expr_ns , { "^" , bitand_expr_ns } ; bitand_expr_ns = additive_expr_ns , { "&" , additive_expr_ns } ; -additive_expr_ns = multiplicative_expr_ns , - { ( "+" | "-" ) , multiplicative_expr_ns } ; +additive_expr_ns = shift_expr_ns , + { ( "+" | "-" ) , shift_expr_ns } ; + +shift_expr_ns = multiplicative_expr_ns , + { ( "<<" | ">>" ) , multiplicative_expr_ns } ; multiplicative_expr_ns = unary_expr_ns , { ( "*" | "/" | "%" ) , unary_expr_ns } ; diff --git a/SYNTAX.md b/SYNTAX.md index 03321f3..9fa1cee 100644 --- a/SYNTAX.md +++ b/SYNTAX.md @@ -24,20 +24,32 @@ appear as UPPERCASE terminals in `GRAMMAR.ebnf`. ### Operator Tokens -| Token | Lexeme | Description | -| --------- | ------ | -------------------------------------- | -| `PLUS` | `+` | Addition / unary plus (not in grammar) | -| `MINUS` | `-` | Subtraction / unary negation | -| `STAR` | `*` | Multiplication / pointer dereference | -| `SLASH` | `/` | Division | -| `PERCENT` | `%` | Modulo (remainder) | -| `AMP` | `&` | Bitwise AND / address-of | -| `PIPE` | `\|` | Bitwise OR | -| `CARET` | `^` | Bitwise XOR | -| `BANG` | `!` | Logical NOT | -| `TILDE` | `~` | Bitwise NOT | -| `DOT` | `.` | Member access | -| `EQ` | `=` | Assignment | +| Token | Lexeme | Description | +| ------------ | ------ | -------------------------------------- | +| `PLUS` | `+` | Addition / unary plus (not in grammar) | +| `MINUS` | `-` | Subtraction / unary negation | +| `STAR` | `*` | Multiplication / pointer dereference | +| `SLASH` | `/` | Division | +| `PERCENT` | `%` | Modulo (remainder) | +| `AMP` | `&` | Bitwise AND / address-of | +| `PIPE` | `\|` | Bitwise OR | +| `CARET` | `^` | Bitwise XOR | +| `BANG` | `!` | Logical NOT | +| `TILDE` | `~` | Bitwise NOT | +| `DOT` | `.` | Member access | +| `SHL` | `<<` | Left shift | +| `SHR` | `>>` | Right shift | +| `EQ` | `=` | Assignment | +| `PLUS_EQ` | `+=` | Add-assign | +| `MINUS_EQ` | `-=` | Subtract-assign | +| `STAR_EQ` | `*=` | Multiply-assign | +| `SLASH_EQ` | `/=` | Divide-assign | +| `PERCENT_EQ` | `%=` | Modulo-assign | +| `AMP_EQ` | `&=` | Bitwise-AND-assign | +| `PIPE_EQ` | `\|=` | Bitwise-OR-assign | +| `CARET_EQ` | `^=` | Bitwise-XOR-assign | +| `SHL_EQ` | `<<=` | Left-shift-assign | +| `SHR_EQ` | `>>=` | Right-shift-assign | ### Keyword Tokens @@ -127,19 +139,20 @@ tightly). ### Operator Precedence Table -| Level | Operators | Associativity | Description | -| ----- | --------------------------- | -------------- | -------------------------------- | -| 1 | `=` | right | Assignment (lowest) | -| 2 | `or` | left | Logical OR | -| 3 | `and` | left | Logical AND | -| 4 | `\|` | left | Bitwise OR | -| 5 | `^` | left | Bitwise XOR | -| 6 | `&` | left | Bitwise AND | -| 7 | `+` `-` | left | Addition, subtraction | -| 8 | `*` `/` `%` | left | Multiplication, division, modulo | -| 9 | `!` `~` `-` `*` `&` | right (unary) | Prefix unary operators | -| 10 | `.` `[…]` `(…)` | left (postfix) | Member access, index, call | -| 11 | literals, identifiers, `()` | — | Primary expressions (highest) | +| Level | Operators | Associativity | Description | +| ----- | -------------------------------------------------------- | -------------- | -------------------------------- | +| 1 | `=` `+=` `-=` `*=` `/=` `%=` `&=` `\|=` `^=` `<<=` `>>=` | right | Assignment (lowest) | +| 2 | `or` | left | Logical OR | +| 3 | `and` | left | Logical AND | +| 4 | `\|` | left | Bitwise OR | +| 5 | `^` | left | Bitwise XOR | +| 6 | `&` | left | Bitwise AND | +| 7 | `+` `-` | left | Addition, subtraction | +| 8 | `<<` `>>` | left | Bit shift | +| 9 | `*` `/` `%` | left | Multiplication, division, modulo | +| 10 | `!` `~` `-` `*` `&` | right (unary) | Prefix unary operators | +| 11 | `.` `[…]` `(…)` | left (postfix) | Member access, index, call | +| 12 | literals, identifiers, `()` | — | Primary expressions (highest) | ### Operator Descriptions @@ -148,6 +161,18 @@ tightly). | Operator | Name | Example | Notes | | -------- | -------------- | --------- | ---------------------------------------------- | | `=` | Assignment | `a = b` | Right-associative; `a = b = c` → `a = (b = c)` | +| `+=` | Add-assign | `a += b` | Expands to `a = a + b` | +| `-=` | Sub-assign | `a -= b` | Expands to `a = a - b` | +| `*=` | Mul-assign | `a *= b` | Expands to `a = a * b` | +| `/=` | Div-assign | `a /= b` | Expands to `a = a / b` | +| `%=` | Rem-assign | `a %= b` | Expands to `a = a % b` | +| `&=` | BitAnd-assign | `a &= b` | Expands to `a = a & b` | +| `\|=` | BitOr-assign | `a \|= b` | Expands to `a = a \| b` | +| `^=` | BitXor-assign | `a ^= b` | Expands to `a = a ^ b` | +| `<<` | Left shift | `a << b` | Shift `a` left by `b` bits; integer types | +| `>>` | Right shift | `a >> b` | Shift `a` right by `b` bits; integer types | +| `<<=` | Shl-assign | `a <<= b` | Expands to `a = a << b` | +| `>>=` | Shr-assign | `a >>= b` | Expands to `a = a >> b` | | `or` | Logical OR | `a or b` | Short-circuits; both operands must be `bool` | | `and` | Logical AND | `a and b` | Short-circuits; both operands must be `bool` | | `\|` | Bitwise OR | `a \| b` | Integer types | diff --git a/fluxc/src/ast.rs b/fluxc/src/ast.rs index 9a3a472..666e236 100644 --- a/fluxc/src/ast.rs +++ b/fluxc/src/ast.rs @@ -11,6 +11,20 @@ pub enum UnaryOp { AddrOf, // `&` } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CompoundAssignOp { + Add, // `+=` + Sub, // `-=` + Mul, // `*=` + Div, // `/=` + Rem, // `%=` + BitAnd, // `&=` + BitOr, // `|=` + BitXor, // `^=` + Shl, // `<<=` + Shr, // `>>=` +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BinaryOp { // Logical @@ -33,6 +47,9 @@ pub enum BinaryOp { Mul, // `*` Div, // `/` Rem, // `%` + // Shift + Shl, // `<<` + Shr, // `>>` // Assignment (lowest precedence, right-associative) Assign, // `=` } @@ -123,6 +140,13 @@ pub enum ExprKind { lhs: Box, rhs: Box, }, + // Compound assignment: `lhs op= rhs` (expands to `lhs = lhs op rhs`) + CompoundAssign { + op: CompoundAssignOp, + op_span: Span, + lhs: Box, + rhs: Box, + }, // Postfix Field { diff --git a/fluxc/src/lexer.rs b/fluxc/src/lexer.rs index 1aae1f9..d4f8b2f 100644 --- a/fluxc/src/lexer.rs +++ b/fluxc/src/lexer.rs @@ -267,14 +267,63 @@ impl<'src> Lexer<'src> { self.advance(); let kind = match c { - // ── Unambiguous single-character tokens ────────────────────────── - '+' => TokenKind::Plus, - '*' => TokenKind::Star, - '/' => TokenKind::Slash, - '%' => TokenKind::Percent, - '&' => TokenKind::Amp, - '|' => TokenKind::Pipe, - '^' => TokenKind::Caret, + // ── Tokens that may be the prefix of a compound-assignment ─────── + '+' => { + if self.peek() == Some('=') { + self.advance(); + TokenKind::PlusEq + } else { + TokenKind::Plus + } + } + '*' => { + if self.peek() == Some('=') { + self.advance(); + TokenKind::StarEq + } else { + TokenKind::Star + } + } + '/' => { + if self.peek() == Some('=') { + self.advance(); + TokenKind::SlashEq + } else { + TokenKind::Slash + } + } + '%' => { + if self.peek() == Some('=') { + self.advance(); + TokenKind::PercentEq + } else { + TokenKind::Percent + } + } + '&' => { + if self.peek() == Some('=') { + self.advance(); + TokenKind::AmpEq + } else { + TokenKind::Amp + } + } + '|' => { + if self.peek() == Some('=') { + self.advance(); + TokenKind::PipeEq + } else { + TokenKind::Pipe + } + } + '^' => { + if self.peek() == Some('=') { + self.advance(); + TokenKind::CaretEq + } else { + TokenKind::Caret + } + } '~' => TokenKind::Tilde, '.' => TokenKind::Dot, '(' => TokenKind::LParen, @@ -292,6 +341,9 @@ impl<'src> Lexer<'src> { if self.peek() == Some('>') { self.advance(); TokenKind::Arrow + } else if self.peek() == Some('=') { + self.advance(); + TokenKind::MinusEq } else { TokenKind::Minus } @@ -313,7 +365,13 @@ impl<'src> Lexer<'src> { } } '<' => { - if self.peek() == Some('=') { + if self.at_ascii2(b'<', b'=') { + self.pos += 2; + TokenKind::ShlEq + } else if self.peek() == Some('<') { + self.advance(); + TokenKind::Shl + } else if self.peek() == Some('=') { self.advance(); TokenKind::LtEq } else { @@ -321,7 +379,13 @@ impl<'src> Lexer<'src> { } } '>' => { - if self.peek() == Some('=') { + if self.at_ascii2(b'>', b'=') { + self.pos += 2; + TokenKind::ShrEq + } else if self.peek() == Some('>') { + self.advance(); + TokenKind::Shr + } else if self.peek() == Some('=') { self.advance(); TokenKind::GtEq } else { @@ -518,6 +582,31 @@ mod tests { ); } + #[test] + fn compound_assign_operators() { + let src = "+= -= *= /= %= &= |= ^= <<= >>="; + assert_eq!( + kinds(src), + vec![ + PlusEq, MinusEq, StarEq, SlashEq, PercentEq, AmpEq, PipeEq, CaretEq, ShlEq, + ShrEq, Eof + ] + ); + } + + #[test] + fn shift_operators() { + let src = "<< >> <<= >>="; + assert_eq!(kinds(src), vec![Shl, Shr, ShlEq, ShrEq, Eof]); + } + + #[test] + fn shift_does_not_steal_comparison() { + // `< =` (with space) should be Lt then Eq, not LtEq + let src = "a < b > c"; + assert_eq!(kinds(src), vec![Ident, Lt, Ident, Gt, Ident, Eof]); + } + #[test] fn punctuation() { assert_eq!( diff --git a/fluxc/src/parser.rs b/fluxc/src/parser.rs index 82a8922..916989b 100644 --- a/fluxc/src/parser.rs +++ b/fluxc/src/parser.rs @@ -2,8 +2,9 @@ use std::fmt; use crate::{ ast::{ - BinaryOp, Block, ElseBranch, Expr, ExprKind, FieldDef, FuncDef, Param, Program, Stmt, - StmtKind, StructDef, StructField, TopLevelDef, TopLevelDefKind, Type, UnaryOp, + BinaryOp, Block, CompoundAssignOp, ElseBranch, Expr, ExprKind, FieldDef, FuncDef, Param, + Program, Stmt, StmtKind, StructDef, StructField, TopLevelDef, TopLevelDefKind, Type, + UnaryOp, }, lexer::Lexer, token::{Span, Token, TokenKind}, @@ -35,9 +36,18 @@ impl fmt::Display for ParseError { fn infix_bp(kind: TokenKind) -> Option<(u8, u8)> { let bp = match kind { - // Assignment: lowest precedence, right-associative (left_bp == right_bp). - // `a = b = c` → `a = (b = c)`. - TokenKind::Eq => (2, 2), + // Assignment and compound assignment: lowest precedence, right-associative. + TokenKind::Eq + | TokenKind::PlusEq + | TokenKind::MinusEq + | TokenKind::StarEq + | TokenKind::SlashEq + | TokenKind::PercentEq + | TokenKind::AmpEq + | TokenKind::PipeEq + | TokenKind::CaretEq + | TokenKind::ShlEq + | TokenKind::ShrEq => (2, 2), TokenKind::Or => (10, 11), TokenKind::And => (20, 21), TokenKind::Pipe => (30, 31), @@ -50,6 +60,7 @@ fn infix_bp(kind: TokenKind) -> Option<(u8, u8)> { | TokenKind::LtEq | TokenKind::GtEq => (55, 56), TokenKind::Plus | TokenKind::Minus => (60, 61), + TokenKind::Shl | TokenKind::Shr => (65, 66), TokenKind::Star | TokenKind::Slash | TokenKind::Percent => (70, 71), // Postfix: `.`, `[`, `(` — handled separately in parse_led, bp listed // here only so callers can detect them as infix/postfix operators. @@ -82,6 +93,22 @@ fn token_to_unary_op(kind: TokenKind) -> UnaryOp { } } +fn token_to_compound_assign_op(kind: TokenKind) -> Option { + match kind { + TokenKind::PlusEq => Some(CompoundAssignOp::Add), + TokenKind::MinusEq => Some(CompoundAssignOp::Sub), + TokenKind::StarEq => Some(CompoundAssignOp::Mul), + TokenKind::SlashEq => Some(CompoundAssignOp::Div), + TokenKind::PercentEq => Some(CompoundAssignOp::Rem), + TokenKind::AmpEq => Some(CompoundAssignOp::BitAnd), + TokenKind::PipeEq => Some(CompoundAssignOp::BitOr), + TokenKind::CaretEq => Some(CompoundAssignOp::BitXor), + TokenKind::ShlEq => Some(CompoundAssignOp::Shl), + TokenKind::ShrEq => Some(CompoundAssignOp::Shr), + _ => None, + } +} + fn token_to_binary_op(kind: TokenKind) -> BinaryOp { match kind { TokenKind::Or => BinaryOp::Or, @@ -100,6 +127,8 @@ fn token_to_binary_op(kind: TokenKind) -> BinaryOp { TokenKind::Star => BinaryOp::Mul, TokenKind::Slash => BinaryOp::Div, TokenKind::Percent => BinaryOp::Rem, + TokenKind::Shl => BinaryOp::Shl, + TokenKind::Shr => BinaryOp::Shr, TokenKind::Eq => BinaryOp::Assign, _ => unreachable!("not a binary op: {:?}", kind), } @@ -588,6 +617,22 @@ impl<'src> Parser<'src> { ) } + // Compound assignment: `lhs op= rhs` + kind if token_to_compound_assign_op(kind).is_some() => { + let op = token_to_compound_assign_op(kind).unwrap(); + let rhs = self.pratt(r_bp, allow_struct_lit); + let span = lhs.span.cover(rhs.span); + Expr::new( + ExprKind::CompoundAssign { + op, + op_span: op_tok.span, + lhs: Box::new(lhs), + rhs: Box::new(rhs), + }, + span, + ) + } + // Binary operator kind => { let op = token_to_binary_op(kind); diff --git a/fluxc/src/token.rs b/fluxc/src/token.rs index d89b16f..9a88551 100644 --- a/fluxc/src/token.rs +++ b/fluxc/src/token.rs @@ -85,6 +85,10 @@ define_tokens! { Bang => "`!`", Tilde => "`~`", + // ── Shift operators ─────────────────────────────────────────────────────── + Shl => "`<<`", + Shr => "`>>`", + // ── Comparison operators ────────────────────────────────────────────────── EqEq => "`==`", BangEq => "`!=`", @@ -95,6 +99,16 @@ define_tokens! { // ── Assignment ──────────────────────────────────────────────────────────── Eq => "`=`", + PlusEq => "`+=`", + MinusEq => "`-=`", + StarEq => "`*=`", + SlashEq => "`/=`", + PercentEq => "`%=`", + AmpEq => "`&=`", + PipeEq => "`|=`", + CaretEq => "`^=`", + ShlEq => "`<<=`", + ShrEq => "`>>=`", // ── Punctuation ─────────────────────────────────────────────────────────── Arrow => "`->`",