Files
flux/GRAMMAR.ebnf
Jooris Hadeler a82b7e4633 Feat: add compound assignment and shift operators
Compound assignment: +=, -=, *=, /=, %=, &=, |=, ^=, <<=, >>=
Shift: <<, >>

Each compound assignment token parses at the same precedence as `=`
(right-associative, lowest) and produces ExprKind::CompoundAssign.
Shifts parse between additive and multiplicative precedence.
GRAMMAR.ebnf and SYNTAX.md updated accordingly.
2026-03-10 18:29:52 +01:00

470 lines
21 KiB
EBNF

(* Flux Language Grammar Context-Free LL(1) Grammar *)
(* ================================================================ *)
(* *)
(* Notation (ISO/IEC 14977 EBNF): *)
(* rule = definition ; defines a rule (terminated by ;) *)
(* a , b concatenation *)
(* a | b alternation *)
(* { a } zero or more repetitions of a *)
(* [ a ] optional a (zero or one) *)
(* ( a | b ) grouping *)
(* "literal" terminal string *)
(* *)
(* UPPERCASE identifiers are lexical token classes whose value *)
(* cannot be expressed as a single literal (e.g. IDENT, INT_LIT). *)
(* They are NOT defined here see SYNTAX.md. *)
(* *)
(* Unique/fixed tokens are written as quoted literals directly. *)
(* *)
(* Lowercase identifiers are non-terminals (grammar productions). *)
(* ================================================================ *)
(* Program (start symbol) *)
(* ================================================================ *)
program = { top_level_def } ;
top_level_def = func_def
| struct_def ;
(* ================================================================ *)
(* Expressions *)
(* ================================================================ *)
expr = assign_expr ;
(* --- Assignment and compound assignment (lowest precedence) --- *)
(* *)
(* assign_op covers `=` and all compound-assignment operators. *)
(* All have the same precedence and are right-associative: *)
(* `a = b = c` `a = (b = c)` *)
(* `a += b += c` `a += (b += c)` (unusual but syntactically *)
(* valid; semantics checked later) *)
(* *)
(* Compound assignments expand semantically: *)
(* `x += y` `x = x + y` *)
(* `x -= y` `x = x - y` etc. *)
(* *)
(* LL(1): after or_expr, peek at next token. *)
(* assign_op token consume and recurse into assign_expr *)
(* other return the or_expr as-is *)
(* None of the assign_op tokens are in FIRST(stmt), so expr_stmt *)
(* remains unambiguous. *)
assign_expr = or_expr , [ assign_op , assign_expr ] ;
assign_op = "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" ;
(* --- Logical OR (lowest-precedence binary operator) --- *)
(* *)
(* Uses keyword `or`; left-associative via iteration. *)
or_expr = and_expr , { "or" , and_expr } ;
(* --- Logical AND --- *)
(* *)
(* Uses keyword `and`; left-associative via iteration. *)
and_expr = bitor_expr , { "and" , bitor_expr } ;
(* --- Bitwise OR --- *)
bitor_expr = bitxor_expr , { "|" , bitxor_expr } ;
(* --- Bitwise XOR --- *)
bitxor_expr = bitand_expr , { "^" , bitand_expr } ;
(* --- Bitwise AND --- *)
bitand_expr = additive_expr , { "&" , additive_expr } ;
(* --- Additive: addition and subtraction --- *)
additive_expr = shift_expr ,
{ ( "+" | "-" ) , shift_expr } ;
(* --- Shift: left shift and right shift --- *)
shift_expr = multiplicative_expr ,
{ ( "<<" | ">>" ) , multiplicative_expr } ;
(* --- Multiplicative: multiplication, division, modulo --- *)
multiplicative_expr = unary_expr ,
{ ( "*" | "/" | "%" ) , unary_expr } ;
(* --- Unary operators (prefix, right-associative by recursion) --- *)
(* *)
(* "!" logical not *)
(* "~" bitwise not *)
(* "-" arithmetic negation *)
(* "*" dereference (pointer indirection) *)
(* "&" address-of *)
unary_expr = "!" , unary_expr
| "~" , unary_expr
| "-" , unary_expr
| "*" , unary_expr
| "&" , unary_expr
| postfix_expr ;
(* --- Postfix operators (left-associative via iteration) --- *)
(* *)
(* Postfix operators bind tighter than any prefix or binary form. *)
(* Multiple postfix operations chain left-to-right. *)
postfix_expr = primary_expr , { postfix_op } ;
postfix_op = "." , IDENT (* member access *)
| "[" , expr , "]" (* subscript/index *)
| "(" , arg_list , ")" ; (* function call *)
(* --- Primary expressions (highest precedence) --- *)
(* *)
(* LL(1) note: after IDENT, peek at the next token. *)
(* "{" parse struct_lit_body (struct literal) *)
(* other bare identifier reference *)
primary_expr = IDENT , [ struct_lit_body ] (* ident or struct lit *)
| INT_LIT
| FLOAT_LIT
| STRING_LIT
| CHAR_LIT
| "true"
| "false"
| "(" , expr , ")" ; (* parenthesised *)
(* --- Struct literal --- *)
(* *)
(* A struct literal constructs a value of a named struct type. *)
(* IDENT "{" field: expr, ... "}" *)
(* *)
(* Field order need not match the struct definition order. *)
(* No trailing comma is permitted (consistent with struct_def). *)
(* *)
(* LL(1) notes: *)
(* struct_field_list: "}" ε; IDENT first field *)
(* FIRST(struct_field) = {IDENT} *)
(* FOLLOW(struct_field_list) = {"}"} *)
(* Disjoint, so no look-ahead conflict. *)
struct_lit_body = "{" , struct_field_list , "}" ;
struct_field_list = [ struct_field , { "," , struct_field } ] ;
struct_field = IDENT , ":" , expr ;
(* ================================================================ *)
(* Argument List *)
(* ================================================================ *)
arg_list = [ expr , { "," , expr } ] ;
(* ================================================================ *)
(* No-Struct Expression Hierarchy (expr_ns) *)
(* ================================================================ *)
(* *)
(* Struct literals create an LL(1) ambiguity in if/while conditions:*)
(* if Point { x: 1 } { ... } *)
(* After "Point", "{" could open a struct literal OR the body block.*)
(* *)
(* Solution: define expr_ns identical to expr except *)
(* primary_expr_ns disallows the struct_lit_body suffix after IDENT.*)
(* Struct literals ARE still allowed when parenthesised: *)
(* if (Point { x: 1 }).flag { ... } *)
(* *)
(* if_stmt and while_stmt use expr_ns for their condition. *)
(* All other expression positions use the full expr. *)
expr_ns = assign_expr_ns ;
assign_expr_ns = or_expr_ns , [ assign_op , assign_expr_ns ] ;
or_expr_ns = and_expr_ns , { "or" , and_expr_ns } ;
and_expr_ns = bitor_expr_ns , { "and" , bitor_expr_ns } ;
bitor_expr_ns = bitxor_expr_ns , { "|" , bitxor_expr_ns } ;
bitxor_expr_ns = bitand_expr_ns , { "^" , bitand_expr_ns } ;
bitand_expr_ns = additive_expr_ns , { "&" , additive_expr_ns } ;
additive_expr_ns = shift_expr_ns ,
{ ( "+" | "-" ) , shift_expr_ns } ;
shift_expr_ns = multiplicative_expr_ns ,
{ ( "<<" | ">>" ) , multiplicative_expr_ns } ;
multiplicative_expr_ns = unary_expr_ns ,
{ ( "*" | "/" | "%" ) , unary_expr_ns } ;
unary_expr_ns = "!" , unary_expr_ns
| "~" , unary_expr_ns
| "-" , unary_expr_ns
| "*" , unary_expr_ns
| "&" , unary_expr_ns
| postfix_expr_ns ;
postfix_expr_ns = primary_expr_ns , { postfix_op } ;
(* primary_expr_ns: same as primary_expr but IDENT is never *)
(* followed by struct_lit_body. Note "(" , expr , ")" uses full *)
(* expr, so struct literals are permitted inside parentheses. *)
primary_expr_ns = IDENT (* bare ident only *)
| INT_LIT
| FLOAT_LIT
| STRING_LIT
| CHAR_LIT
| "true"
| "false"
| "(" , expr , ")" ; (* struct lit OK here *)
(* ================================================================ *)
(* Types *)
(* ================================================================ *)
type = primitive_type
| named_type
| pointer_type
| array_type ;
(* --- Primitive types --- *)
(* *)
(* Unsigned integers : u8 u16 u32 u64 *)
(* Signed integers : i8 i16 i32 i64 *)
(* Floating-point : f32 f64 *)
(* Other : bool char *)
primitive_type = "u8" | "u16" | "u32" | "u64"
| "i8" | "i16" | "i32" | "i64"
| "f32" | "f64"
| "bool" | "char" ;
(* --- Named types --- *)
(* *)
(* A user-defined type referenced by its identifier (e.g. a struct *)
(* name). The lexer guarantees that all primitive-type keywords are *)
(* reserved, so IDENT never clashes with primitive_type. *)
named_type = IDENT ;
(* --- Pointer types --- *)
(* *)
(* "*" type typed pointer; the pointee type is known. *)
(* "*opaque" untyped/opaque pointer (no pointee type info). *)
(* *)
(* LL(1) note: after "*", "opaque" is not in FIRST(type), so the *)
(* two alternatives are always distinguishable with one token. *)
pointer_type = "*" , ( "opaque" | type ) ;
(* --- Array types --- *)
(* *)
(* "[" type ";" INT_LIT "]" *)
(* *)
(* The element type and the fixed size (a non-negative integer *)
(* literal) are separated by ";". Sizes that are constant *)
(* expressions may be introduced in a later grammar revision. *)
array_type = "[" , type , ";" , INT_LIT , "]" ;
(* ================================================================ *)
(* Statements *)
(* ================================================================ *)
stmt = let_stmt
| return_stmt
| if_stmt
| while_stmt
| loop_stmt
| break_stmt
| continue_stmt
| block_stmt
| expr_stmt ;
(* --- Return statement --- *)
(* *)
(* Exits the enclosing function, optionally yielding a value. *)
(* "return ;" is used when the function return type is (). *)
(* *)
(* LL(1): after "return", peek at next token. *)
(* ";" no expression (unit return) *)
(* other parse expr, then expect ";" *)
(* ";" is not in FIRST(expr), so the two cases are unambiguous. *)
return_stmt = "return" , [ expr ] , ";" ;
(* --- Expression statement --- *)
(* *)
(* Evaluates an expression for its side effects; the value is *)
(* discarded. The ";" is mandatory. *)
(* *)
(* LL(1): at stmt level: *)
(* "let" let_stmt *)
(* "return" return_stmt *)
(* "if" if_stmt *)
(* "while" while_stmt *)
(* "loop" loop_stmt *)
(* "break" break_stmt *)
(* "continue" continue_stmt *)
(* "{" block_stmt *)
(* other expr_stmt *)
expr_stmt = expr , ";" ;
(* --- If statement --- *)
(* *)
(* Conditionally executes a block. An optional "else" branch may *)
(* follow; it is either a plain block or another "if" statement, *)
(* enabling "else if" chains of arbitrary length. *)
(* *)
(* LL(1) notes: *)
(* condition uses expr_ns struct literals are forbidden at the *)
(* outermost level to avoid ambiguity with the body block's "{". *)
(* [ "else" ... ] consume "else" iff next token is "else" *)
(* else_branch: "if" if_stmt (else-if); "{" block_stmt *)
(* The two else_branch alternatives start with distinct tokens, *)
(* so no look-ahead conflict arises (no dangling-else ambiguity). *)
if_stmt = "if" , expr_ns , block_stmt , [ "else" , else_branch ] ;
else_branch = if_stmt (* else if *)
| block_stmt ; (* plain else *)
(* --- While loop --- *)
(* *)
(* Repeatedly executes the body as long as the condition is true. *)
(* The condition is re-evaluated before every iteration. *)
(* If the condition is false on the first check, the body never *)
(* executes. *)
(* *)
(* Like if_stmt, the condition uses expr_ns to prevent struct *)
(* literal ambiguity with the body block's opening "{". *)
while_stmt = "while" , expr_ns , block_stmt ;
(* --- Infinite loop --- *)
(* *)
(* Executes the body unconditionally and indefinitely. The only *)
(* ways to exit are "break" or "return" inside the body. *)
loop_stmt = "loop" , block_stmt ;
(* --- Break and continue --- *)
(* *)
(* "break" exits the immediately enclosing "while" or "loop". *)
(* "continue" skips the rest of the current iteration and jumps to *)
(* the next condition check (while) or iteration (loop). *)
(* Both are only valid inside a loop body; the compiler enforces *)
(* this as a semantic rule. *)
break_stmt = "break" , ";" ;
continue_stmt = "continue" , ";" ;
(* --- Block statement --- *)
(* *)
(* A block groups zero or more statements into a single statement *)
(* and introduces a new lexical scope. It does not produce a value. *)
(* *)
(* LL(1): at stmt level, "{" unambiguously selects block since no *)
(* other stmt alternative starts with "{". *)
block_stmt = "{" , { stmt } , "}" ;
(* --- Let statement --- *)
(* *)
(* Introduces a named binding in the current scope. *)
(* Bindings are immutable by default; "mut" opts into mutability. *)
(* *)
(* The type annotation and the initialiser are both optional, but *)
(* at least one must be present for the binding to be usable; *)
(* the compiler enforces this as a semantic (not syntactic) rule. *)
(* *)
(* LL(1) notes: *)
(* [ "mut" ] consume "mut" iff the next token is "mut" *)
(* [ ":" ... ] consume iff next token is ":" *)
(* [ "=" ... ] consume iff next token is "=" *)
(* All decision tokens are distinct, so no look-ahead conflict. *)
let_stmt = "let" , [ "mut" ] , IDENT ,
[ ":" , type ] ,
[ "=" , expr ] ,
";" ;
(* ================================================================ *)
(* Top-Level Definitions *)
(* ================================================================ *)
(* --- Function definition --- *)
(* *)
(* Defines a named function with a typed parameter list and an *)
(* optional return type. Omitting "->" implies a return type of (). *)
(* *)
(* LL(1) notes: *)
(* param_list: ")" ε (empty list); else parse first param *)
(* param: "mut" consume; IDENT skip (mut absent) *)
(* [ "->" ... ]: consume iff next token is "->" *)
(* "->" is a two-character token; distinct from all stmt-starting *)
(* tokens, so no look-ahead conflict with block_stmt that follows *)
func_def = "fn" , IDENT , "(" , param_list , ")" ,
[ "->" , type ] ,
block_stmt ;
param_list = [ param , { "," , param } ] ;
(* Each parameter is an optionally-mutable name with a required *)
(* type annotation. Mutability applies within the function body. *)
param = [ "mut" ] , IDENT , ":" , type ;
(* --- Struct definition --- *)
(* *)
(* Defines a named product type with zero or more typed fields. *)
(* Fields are separated by commas; no trailing comma is permitted. *)
(* *)
(* LL(1) notes: *)
(* field_list: "}" ε (empty struct); else parse first field *)
(* FIRST(field) = {IDENT}, FOLLOW(field_list) = {"}"} *)
(* Disjoint, so no look-ahead conflict. *)
(* top_level_def: "fn" func_def; "struct" struct_def *)
struct_def = "struct" , IDENT , "{" , field_list , "}" ;
field_list = [ field , { "," , field } ] ;
field = IDENT , ":" , type ;